Skip to content

Commit 145c5e0

Browse files
author
Bangqi Zhu
committed
upgrade rageneine to v1beta1
Signed-off-by: Bangqi Zhu <[email protected]>
1 parent 47fcd2e commit 145c5e0

26 files changed

+1135
-298
lines changed

api/v1beta1/condition_types.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ type ConditionType string
1818

1919
const (
2020
// ConditionTypeNodeClaimStatus is the state when checking nodeClaim status.
21-
ConditionTypeNodeClaimStatus = ConditionType("NodeClaimsReady")
21+
ConditionTypeNodeClaimStatus = ConditionType("NodeClaimReady")
2222

2323
// ConditionTypeNodeStatus is the state when checking node status.
2424
ConditionTypeNodeStatus = ConditionType("NodesReady")
@@ -29,9 +29,18 @@ const (
2929
// WorkspaceConditionTypeInferenceStatus is the state when Inference service has been ready.
3030
WorkspaceConditionTypeInferenceStatus = ConditionType("InferenceReady")
3131

32+
// RAGEneineConditionTypeServiceStatus is the state when service has been ready.
33+
RAGEneineConditionTypeServiceStatus = ConditionType("ServiceReady")
34+
35+
// RAGConditionTypeServiceStatus is the state when RAG Engine service has been ready.
36+
RAGConditionTypeServiceStatus = ConditionType("RAGEngineServiceReady")
37+
3238
// WorkspaceConditionTypeTuningJobStatus is the state when the tuning job starts normally.
3339
WorkspaceConditionTypeTuningJobStatus ConditionType = ConditionType("JobStarted")
3440

41+
//RAGEngineConditionTypeDeleting is the RAGEngine state when starts to get deleted.
42+
RAGEngineConditionTypeDeleting = ConditionType("RAGEngineDeleting")
43+
3544
//WorkspaceConditionTypeDeleting is the Workspace state when starts to get deleted.
3645
WorkspaceConditionTypeDeleting = ConditionType("WorkspaceDeleting")
3746

@@ -40,6 +49,8 @@ const (
4049
//For fine tuning, the "True" condition means the tuning job completes successfully.
4150
WorkspaceConditionTypeSucceeded ConditionType = ConditionType("WorkspaceSucceeded")
4251

52+
RAGEngineConditionTypeSucceeded ConditionType = ConditionType("RAGEngineSucceeded")
53+
4354
// ConditionTypeScalingDownStatus is the state when scaling down nodeClaim.
4455
ConditionTypeScalingDownStatus = ConditionType("ScalingDownCompleted")
4556
)

api/v1beta1/labels.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,21 @@ const (
3232
// LabelWorkspaceName is the label for workspace name.
3333
LabelWorkspaceName = KAITOPrefix + "workspace"
3434

35+
// LabelRAGEngineName is the label for ragengine name.
36+
LabelRAGEngineName = KAITOPrefix + "ragengine"
37+
3538
// LabelWorkspaceName is the label for workspace namespace.
3639
LabelWorkspaceNamespace = KAITOPrefix + "workspacenamespace"
3740

41+
// LabelRAGEngineNamespace is the label for ragengine namespace.
42+
LabelRAGEngineNamespace = KAITOPrefix + "ragenginenamespace"
43+
3844
// WorkspaceRevisionAnnotation is the Annotations for revision number
3945
WorkspaceRevisionAnnotation = "workspace.kaito.io/revision"
4046

47+
// RAGEngineRevisionAnnotation is the Annotations for revision number
48+
RAGEngineRevisionAnnotation = "ragengine.kaito.io/revision"
49+
4150
// AnnotationWorkspaceRuntime is the annotation for runtime selection.
4251
AnnotationWorkspaceRuntime = KAITOPrefix + "runtime"
4352

api/v1beta1/ragengine_default.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright (c) KAITO authors.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package v1beta1
15+
16+
import (
17+
"context"
18+
)
19+
20+
// SetDefaults for the RAG Engine
21+
func (w *RAGEngine) SetDefaults(_ context.Context) {
22+
}

api/v1beta1/ragengine_types.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Copyright (c) KAITO authors.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package v1beta1
15+
16+
import (
17+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
)
19+
20+
type StorageSpec struct {
21+
// PersistentVolumeClaim specifies the PVC to use for persisting vector database data.
22+
// If not specified, an emptyDir will be used (data will be lost on pod restart).
23+
// +optional
24+
PersistentVolumeClaim string `json:"persistentVolumeClaim,omitempty"`
25+
// MountPath specifies where the volume should be mounted in the container.
26+
// Defaults to /mnt/data if not specified.
27+
// +optional
28+
MountPath string `json:"mountPath,omitempty"`
29+
}
30+
31+
type RemoteEmbeddingSpec struct {
32+
// URL points to a publicly available embedding service, such as OpenAI.
33+
URL string `json:"url"`
34+
// AccessSecret is the name of the secret that contains the service access token.
35+
// +optional
36+
AccessSecret string `json:"accessSecret,omitempty"`
37+
}
38+
39+
type LocalEmbeddingSpec struct {
40+
// Image is the name of the containerized embedding model image.
41+
// +optional
42+
Image string `json:"image,omitempty"`
43+
// +optional
44+
ImagePullSecret string `json:"imagePullSecret,omitempty"`
45+
// ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5.
46+
// When this field is specified, the RAG engine will download the embedding model
47+
// from huggingface repository during startup. The embedding model will not persist in local storage.
48+
// Note that if Image is specified, ModelID should not be specified and vice versa.
49+
// +optional
50+
ModelID string `json:"modelID,omitempty"`
51+
// ModelAccessSecret is the name of the secret that contains the huggingface access token.
52+
// +optional
53+
ModelAccessSecret string `json:"modelAccessSecret,omitempty"`
54+
}
55+
56+
type EmbeddingSpec struct {
57+
// Remote specifies how to generate embeddings for index data using a remote service.
58+
// Note that either Remote or Local needs to be specified, not both.
59+
// +optional
60+
Remote *RemoteEmbeddingSpec `json:"remote,omitempty"`
61+
// Local specifies how to generate embeddings for index data using a model run locally.
62+
// +optional
63+
Local *LocalEmbeddingSpec `json:"local,omitempty"`
64+
}
65+
66+
type InferenceServiceSpec struct {
67+
// URL points to a running inference service endpoint which accepts http(s) payload.
68+
URL string `json:"url"`
69+
// AccessSecret is the name of the secret that contains the service access token.
70+
// +optional
71+
AccessSecret string `json:"accessSecret,omitempty"`
72+
// ContextWindowSize defines the combined maximum of input and output tokens that can be handled by the LLM in a single request.
73+
// This value is critical for accurately managing how much of the original query and supporting documents
74+
// (retrieved via RAG) can be included in the prompt without exceeding the model's input limit.
75+
//
76+
// It is used to determine how much space is available for retrieved documents after accounting for the query,
77+
// system prompts, formatting tokens, and any other fixed prompt components.
78+
//
79+
// Setting this value correctly is essential for ensuring that the RAG system does not truncate important
80+
// context or exceed model limits, which can lead to degraded response quality or inference errors.
81+
//
82+
// Must match the token limit of the LLM backend being used (e.g., 8096, 16384, 32768 tokens).
83+
ContextWindowSize int `json:"contextWindowSize"`
84+
}
85+
86+
type RAGEngineSpec struct {
87+
// Compute specifies the dedicated GPU resource used by an embedding model running locally if required.
88+
// +optional
89+
Compute *ResourceSpec `json:"compute,omitempty"`
90+
// Storage specifies how to access the vector database used to save the embedding vectors.
91+
// If this field is not specified, by default, an in-memory vector DB will be used.
92+
// The data will not be persisted.
93+
// +optional
94+
Storage *StorageSpec `json:"storage,omitempty"`
95+
// Embedding specifies whether the RAG engine generates embedding vectors using a remote service
96+
// or using a embedding model running locally.
97+
Embedding *EmbeddingSpec `json:"embedding"`
98+
InferenceService *InferenceServiceSpec `json:"inferenceService"`
99+
// QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the
100+
// inference service. If not specified, a default service name will be created by the RAG engine.
101+
// +optional
102+
QueryServiceName string `json:"queryServiceName,omitempty"`
103+
// IndexServiceName is the name of the service which exposes the endpoint for user to input the index data
104+
// to generate embeddings. If not specified, a default service name will be created by the RAG engine.
105+
// +optional
106+
IndexServiceName string `json:"indexServiceName,omitempty"`
107+
}
108+
109+
// RAGEngineStatus defines the observed state of RAGEngine
110+
type RAGEngineStatus struct {
111+
// WorkerNodes is the list of nodes chosen to run the workload based on the RAGEngine resource requirement.
112+
// +optional
113+
WorkerNodes []string `json:"workerNodes,omitempty"`
114+
115+
Conditions []metav1.Condition `json:"conditions,omitempty"`
116+
}
117+
118+
// RAGEngine is the Schema for the ragengine API
119+
// +kubebuilder:object:root=true
120+
// +kubebuilder:subresource:status
121+
// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine,shortName=rag
122+
// +kubebuilder:storageversion
123+
// +kubebuilder:printcolumn:name="Instance",type="string",JSONPath=".spec.compute.instanceType",description=""
124+
// +kubebuilder:printcolumn:name="ResourceReady",type="string",JSONPath=".status.conditions[?(@.type==\"ResourceReady\")].status",description=""
125+
// +kubebuilder:printcolumn:name="ServiceReady",type="string",JSONPath=".status.conditions[?(@.type==\"ServiceReady\")].status",description=""
126+
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description=""
127+
type RAGEngine struct {
128+
metav1.TypeMeta `json:",inline"`
129+
metav1.ObjectMeta `json:"metadata,omitempty"`
130+
131+
Spec *RAGEngineSpec `json:"spec,omitempty"`
132+
133+
Status RAGEngineStatus `json:"status,omitempty"`
134+
}
135+
136+
// RAGEngineList contains a list of RAGEngine
137+
// +kubebuilder:object:root=true
138+
type RAGEngineList struct {
139+
metav1.TypeMeta `json:",inline"`
140+
metav1.ListMeta `json:"metadata,omitempty"`
141+
Items []RAGEngine `json:"items"`
142+
}
143+
144+
func init() {
145+
SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{})
146+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright (c) KAITO authors.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package v1beta1
15+
16+
import (
17+
"context"
18+
"fmt"
19+
"net/url"
20+
"os"
21+
"regexp"
22+
"strings"
23+
24+
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/klog/v2"
27+
"knative.dev/pkg/apis"
28+
29+
"github.com/kaito-project/kaito/pkg/utils"
30+
"github.com/kaito-project/kaito/pkg/utils/consts"
31+
)
32+
33+
func (w *RAGEngine) SupportedVerbs() []admissionregistrationv1.OperationType {
34+
return []admissionregistrationv1.OperationType{
35+
admissionregistrationv1.Create,
36+
admissionregistrationv1.Update,
37+
}
38+
}
39+
40+
func (w *RAGEngine) Validate(ctx context.Context) (errs *apis.FieldError) {
41+
base := apis.GetBaseline(ctx)
42+
if base == nil {
43+
klog.InfoS("Validate creation", "ragengine", fmt.Sprintf("%s/%s", w.Namespace, w.Name))
44+
errs = errs.Also(w.validateCreate().ViaField("spec"))
45+
} else {
46+
klog.InfoS("Validate update", "ragengine", fmt.Sprintf("%s/%s", w.Namespace, w.Name))
47+
old := base.(*RAGEngine)
48+
errs = errs.Also(
49+
w.validateCreate().ViaField("spec"),
50+
w.Spec.Compute.validateUpdate(old.Spec.Compute).ViaField("resource"),
51+
)
52+
}
53+
return errs
54+
}
55+
56+
func (w *RAGEngine) validateCreate() (errs *apis.FieldError) {
57+
if w.Spec.InferenceService == nil {
58+
errs = errs.Also(apis.ErrGeneric("InferenceService must be specified", ""))
59+
}
60+
errs = errs.Also(w.Spec.InferenceService.validateCreate())
61+
if w.Spec.Embedding == nil {
62+
errs = errs.Also(apis.ErrGeneric("Embedding must be specified", ""))
63+
return errs
64+
}
65+
if w.Spec.Embedding.Local == nil && w.Spec.Embedding.Remote == nil {
66+
errs = errs.Also(apis.ErrGeneric("Either remote embedding or local embedding must be specified, not neither", ""))
67+
}
68+
if w.Spec.Embedding.Local != nil && w.Spec.Embedding.Remote != nil {
69+
errs = errs.Also(apis.ErrGeneric("Either remote embedding or local embedding must be specified, but not both", ""))
70+
}
71+
errs = errs.Also(w.Spec.Compute.validateRAGCreate())
72+
if w.Spec.Embedding.Local != nil {
73+
w.Spec.Embedding.Local.validateCreate().ViaField("embedding")
74+
}
75+
if w.Spec.Embedding.Remote != nil {
76+
w.Spec.Embedding.Remote.validateCreate().ViaField("embedding")
77+
}
78+
79+
return errs
80+
}
81+
82+
func (r *ResourceSpec) validateRAGCreate() (errs *apis.FieldError) {
83+
instanceType := string(r.InstanceType)
84+
85+
skuHandler, err := utils.GetSKUHandler()
86+
if err != nil {
87+
errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("Failed to get SKU handler: %v", err), "instanceType"))
88+
return errs
89+
}
90+
91+
if skuConfig := skuHandler.GetGPUConfigBySKU(instanceType); skuConfig == nil {
92+
provider := os.Getenv("CLOUD_PROVIDER")
93+
// Check for other instance types pattern matches if cloud provider is Azure
94+
if provider != consts.AzureCloudName || (!strings.HasPrefix(instanceType, N_SERIES_PREFIX) && !strings.HasPrefix(instanceType, D_SERIES_PREFIX)) {
95+
errs = errs.Also(apis.ErrInvalidValue(fmt.Sprintf("Unsupported instance type %s. Supported SKUs: %s", instanceType, skuHandler.GetSupportedSKUs()), "instanceType"))
96+
}
97+
}
98+
99+
// Validate labelSelector
100+
if _, err := metav1.LabelSelectorAsMap(r.LabelSelector); err != nil {
101+
errs = errs.Also(apis.ErrInvalidValue(err.Error(), "labelSelector"))
102+
}
103+
104+
return errs
105+
}
106+
107+
func (e *LocalEmbeddingSpec) validateCreate() (errs *apis.FieldError) {
108+
if e.Image == "" && e.ModelID == "" {
109+
errs = errs.Also(apis.ErrGeneric("Either image or modelID must be specified, not neither", ""))
110+
}
111+
if e.Image != "" && e.ModelID != "" {
112+
errs = errs.Also(apis.ErrGeneric("Either image or modelID must be specified, but not both", ""))
113+
}
114+
if e.Image != "" {
115+
re := regexp.MustCompile(`^(.+/[^:/]+):([^:/]+)$`)
116+
if !re.MatchString(e.Image) {
117+
errs = errs.Also(apis.ErrInvalidValue("Invalid image format, require full input image URL", "Image"))
118+
} else {
119+
// Executes if image is of correct format
120+
err := utils.ExtractAndValidateRepoName(e.Image)
121+
if err != nil {
122+
errs = errs.Also(apis.ErrInvalidValue(err.Error(), "Image"))
123+
}
124+
}
125+
}
126+
return errs
127+
}
128+
129+
func (e *RemoteEmbeddingSpec) validateCreate() (errs *apis.FieldError) {
130+
_, err := url.ParseRequestURI(e.URL)
131+
if err != nil {
132+
errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("URL input error: %v", err), "remote url"))
133+
}
134+
return errs
135+
}
136+
137+
func (e *InferenceServiceSpec) validateCreate() (errs *apis.FieldError) {
138+
_, err := url.ParseRequestURI(e.URL)
139+
if err != nil {
140+
errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("URL input error: %v", err), "remote url"))
141+
}
142+
143+
if e.ContextWindowSize <= 0 {
144+
errs = errs.Also(apis.ErrInvalidValue("ContextWindowSize must be a positive integer", "contextWindowSize"))
145+
}
146+
147+
return errs
148+
}

0 commit comments

Comments
 (0)