kaito-project
diff --git a/‎api/v1beta1/condition_types.go‎
Lines changed: 12 additions & 1 deletion b/‎api/v1beta1/condition_types.go‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎api/v1beta1/labels.go‎
Lines changed: 9 additions & 0 deletions b/‎api/v1beta1/labels.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎api/v1beta1/ragengine_default.go‎
Lines changed: 22 additions & 0 deletions b/‎api/v1beta1/ragengine_default.go‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎api/v1beta1/ragengine_types.go‎
Lines changed: 146 additions & 0 deletions b/‎api/v1beta1/ragengine_types.go‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎api/v1beta1/ragengine_validation.go‎
Lines changed: 148 additions & 0 deletions b/‎api/v1beta1/ragengine_validation.go‎
Lines changed: 148 additions & 0 deletions
@@ -18,7 +18,7 @@ type ConditionType string
 
 const (
 	// ConditionTypeNodeClaimStatus is the state when checking nodeClaim status.
-	ConditionTypeNodeClaimStatus = ConditionType("NodeClaimsReady")
+	ConditionTypeNodeClaimStatus = ConditionType("NodeClaimReady")
 
 	// ConditionTypeNodeStatus is the state when checking node status.
 	ConditionTypeNodeStatus = ConditionType("NodesReady")
@@ -29,9 +29,18 @@ const (
 	// WorkspaceConditionTypeInferenceStatus is the state when Inference service has been ready.
 	WorkspaceConditionTypeInferenceStatus = ConditionType("InferenceReady")
 
+	// RAGEneineConditionTypeServiceStatus is the state when service has been ready.
+	RAGEneineConditionTypeServiceStatus = ConditionType("ServiceReady")
+
+	// RAGConditionTypeServiceStatus is the state when RAG Engine service has been ready.
+	RAGConditionTypeServiceStatus = ConditionType("RAGEngineServiceReady")
+
 	// WorkspaceConditionTypeTuningJobStatus is the state when the tuning job starts normally.
 	WorkspaceConditionTypeTuningJobStatus ConditionType = ConditionType("JobStarted")
 
+	//RAGEngineConditionTypeDeleting is the RAGEngine state when starts to get deleted.
+	RAGEngineConditionTypeDeleting = ConditionType("RAGEngineDeleting")
+
 	//WorkspaceConditionTypeDeleting is the Workspace state when starts to get deleted.
 	WorkspaceConditionTypeDeleting = ConditionType("WorkspaceDeleting")
 
@@ -40,6 +49,8 @@ const (
 	//For fine tuning, the "True" condition means the tuning job completes successfully.
 	WorkspaceConditionTypeSucceeded ConditionType = ConditionType("WorkspaceSucceeded")
 
+	RAGEngineConditionTypeSucceeded ConditionType = ConditionType("RAGEngineSucceeded")
+
 	// ConditionTypeScalingDownStatus is the state when scaling down nodeClaim.
 	ConditionTypeScalingDownStatus = ConditionType("ScalingDownCompleted")
 )
@@ -32,12 +32,21 @@ const (
 	// LabelWorkspaceName is the label for workspace name.
 	LabelWorkspaceName = KAITOPrefix + "workspace"
 
+	// LabelRAGEngineName is the label for ragengine name.
+	LabelRAGEngineName = KAITOPrefix + "ragengine"
+
 	// LabelWorkspaceName is the label for workspace namespace.
 	LabelWorkspaceNamespace = KAITOPrefix + "workspacenamespace"
 
+	// LabelRAGEngineNamespace is the label for ragengine namespace.
+	LabelRAGEngineNamespace = KAITOPrefix + "ragenginenamespace"
+
 	// WorkspaceRevisionAnnotation is the Annotations for revision number
 	WorkspaceRevisionAnnotation = "workspace.kaito.io/revision"
 
+	// RAGEngineRevisionAnnotation is the Annotations for revision number
+	RAGEngineRevisionAnnotation = "ragengine.kaito.io/revision"
+
 	// AnnotationWorkspaceRuntime is the annotation for runtime selection.
 	AnnotationWorkspaceRuntime = KAITOPrefix + "runtime"
 
 
@@ -0,0 +1,22 @@
+// Copyright (c) KAITO authors.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v1beta1
+
+import (
+	"context"
+)
+
+// SetDefaults for the RAG Engine
+func (w *RAGEngine) SetDefaults(_ context.Context) {
+}
@@ -0,0 +1,146 @@
+// Copyright (c) KAITO authors.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v1beta1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+type StorageSpec struct {
+	// PersistentVolumeClaim specifies the PVC to use for persisting vector database data.
+	// If not specified, an emptyDir will be used (data will be lost on pod restart).
+	// +optional
+	PersistentVolumeClaim string `json:"persistentVolumeClaim,omitempty"`
+	// MountPath specifies where the volume should be mounted in the container.
+	// Defaults to /mnt/data if not specified.
+	// +optional
+	MountPath string `json:"mountPath,omitempty"`
+}
+
+type RemoteEmbeddingSpec struct {
+	// URL points to a publicly available embedding service, such as OpenAI.
+	URL string `json:"url"`
+	// AccessSecret is the name of the secret that contains the service access token.
+	// +optional
+	AccessSecret string `json:"accessSecret,omitempty"`
+}
+
+type LocalEmbeddingSpec struct {
+	// Image is the name of the containerized embedding model image.
+	// +optional
+	Image string `json:"image,omitempty"`
+	// +optional
+	ImagePullSecret string `json:"imagePullSecret,omitempty"`
+	// ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5.
+	// When this field is specified, the RAG engine will download the embedding model
+	// from huggingface repository during startup. The embedding model will not persist in local storage.
+	// Note that if Image is specified, ModelID should not be specified and vice versa.
+	// +optional
+	ModelID string `json:"modelID,omitempty"`
+	// ModelAccessSecret is the name of the secret that contains the huggingface access token.
+	// +optional
+	ModelAccessSecret string `json:"modelAccessSecret,omitempty"`
+}
+
+type EmbeddingSpec struct {
+	// Remote specifies how to generate embeddings for index data using a remote service.
+	// Note that either Remote or Local needs to be specified, not both.
+	// +optional
+	Remote *RemoteEmbeddingSpec `json:"remote,omitempty"`
+	// Local specifies how to generate embeddings for index data using a model run locally.
+	// +optional
+	Local *LocalEmbeddingSpec `json:"local,omitempty"`
+}
+
+type InferenceServiceSpec struct {
+	// URL points to a running inference service endpoint which accepts http(s) payload.
+	URL string `json:"url"`
+	// AccessSecret is the name of the secret that contains the service access token.
+	// +optional
+	AccessSecret string `json:"accessSecret,omitempty"`
+	// ContextWindowSize defines the combined maximum of input and output tokens that can be handled by the LLM in a single request.
+	// This value is critical for accurately managing how much of the original query and supporting documents
+	// (retrieved via RAG) can be included in the prompt without exceeding the model's input limit.
+	//
+	// It is used to determine how much space is available for retrieved documents after accounting for the query,
+	// system prompts, formatting tokens, and any other fixed prompt components.
+	//
+	// Setting this value correctly is essential for ensuring that the RAG system does not truncate important
+	// context or exceed model limits, which can lead to degraded response quality or inference errors.
+	//
+	// Must match the token limit of the LLM backend being used (e.g., 8096, 16384, 32768 tokens).
+	ContextWindowSize int `json:"contextWindowSize"`
+}
+
+type RAGEngineSpec struct {
+	// Compute specifies the dedicated GPU resource used by an embedding model running locally if required.
+	// +optional
+	Compute *ResourceSpec `json:"compute,omitempty"`
+	// Storage specifies how to access the vector database used to save the embedding vectors.
+	// If this field is not specified, by default, an in-memory vector DB will be used.
+	// The data will not be persisted.
+	// +optional
+	Storage *StorageSpec `json:"storage,omitempty"`
+	// Embedding specifies whether the RAG engine generates embedding vectors using a remote service
+	// or using a embedding model running locally.
+	Embedding        *EmbeddingSpec        `json:"embedding"`
+	InferenceService *InferenceServiceSpec `json:"inferenceService"`
+	// QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the
+	// inference service. If not specified, a default service name will be created by the RAG engine.
+	// +optional
+	QueryServiceName string `json:"queryServiceName,omitempty"`
+	// IndexServiceName is the name of the service which exposes the endpoint for user to input the index data
+	// to generate embeddings. If not specified, a default service name will be created by the RAG engine.
+	// +optional
+	IndexServiceName string `json:"indexServiceName,omitempty"`
+}
+
+// RAGEngineStatus defines the observed state of RAGEngine
+type RAGEngineStatus struct {
+	// WorkerNodes is the list of nodes chosen to run the workload based on the RAGEngine resource requirement.
+	// +optional
+	WorkerNodes []string `json:"workerNodes,omitempty"`
+
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// RAGEngine is the Schema for the ragengine API
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine,shortName=rag
+// +kubebuilder:storageversion
+// +kubebuilder:printcolumn:name="Instance",type="string",JSONPath=".spec.compute.instanceType",description=""
+// +kubebuilder:printcolumn:name="ResourceReady",type="string",JSONPath=".status.conditions[?(@.type==\"ResourceReady\")].status",description=""
+// +kubebuilder:printcolumn:name="ServiceReady",type="string",JSONPath=".status.conditions[?(@.type==\"ServiceReady\")].status",description=""
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description=""
+type RAGEngine struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec *RAGEngineSpec `json:"spec,omitempty"`
+
+	Status RAGEngineStatus `json:"status,omitempty"`
+}
+
+// RAGEngineList contains a list of RAGEngine
+// +kubebuilder:object:root=true
+type RAGEngineList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []RAGEngine `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{})
+}
@@ -0,0 +1,148 @@
+// Copyright (c) KAITO authors.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v1beta1
+
+import (
+	"context"
+	"fmt"
+	"net/url"
+	"os"
+	"regexp"
+	"strings"
+
+	admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
+	"knative.dev/pkg/apis"
+
+	"github.com/kaito-project/kaito/pkg/utils"
+	"github.com/kaito-project/kaito/pkg/utils/consts"
+)
+
+func (w *RAGEngine) SupportedVerbs() []admissionregistrationv1.OperationType {
+	return []admissionregistrationv1.OperationType{
+		admissionregistrationv1.Create,
+		admissionregistrationv1.Update,
+	}
+}
+
+func (w *RAGEngine) Validate(ctx context.Context) (errs *apis.FieldError) {
+	base := apis.GetBaseline(ctx)
+	if base == nil {
+		klog.InfoS("Validate creation", "ragengine", fmt.Sprintf("%s/%s", w.Namespace, w.Name))
+		errs = errs.Also(w.validateCreate().ViaField("spec"))
+	} else {
+		klog.InfoS("Validate update", "ragengine", fmt.Sprintf("%s/%s", w.Namespace, w.Name))
+		old := base.(*RAGEngine)
+		errs = errs.Also(
+			w.validateCreate().ViaField("spec"),
+			w.Spec.Compute.validateUpdate(old.Spec.Compute).ViaField("resource"),
+		)
+	}
+	return errs
+}
+
+func (w *RAGEngine) validateCreate() (errs *apis.FieldError) {
+	if w.Spec.InferenceService == nil {
+		errs = errs.Also(apis.ErrGeneric("InferenceService must be specified", ""))
+	}
+	errs = errs.Also(w.Spec.InferenceService.validateCreate())
+	if w.Spec.Embedding == nil {
+		errs = errs.Also(apis.ErrGeneric("Embedding must be specified", ""))
+		return errs
+	}
+	if w.Spec.Embedding.Local == nil && w.Spec.Embedding.Remote == nil {
+		errs = errs.Also(apis.ErrGeneric("Either remote embedding or local embedding must be specified, not neither", ""))
+	}
+	if w.Spec.Embedding.Local != nil && w.Spec.Embedding.Remote != nil {
+		errs = errs.Also(apis.ErrGeneric("Either remote embedding or local embedding must be specified, but not both", ""))
+	}
+	errs = errs.Also(w.Spec.Compute.validateRAGCreate())
+	if w.Spec.Embedding.Local != nil {
+		w.Spec.Embedding.Local.validateCreate().ViaField("embedding")
+	}
+	if w.Spec.Embedding.Remote != nil {
+		w.Spec.Embedding.Remote.validateCreate().ViaField("embedding")
+	}
+
+	return errs
+}
+
+func (r *ResourceSpec) validateRAGCreate() (errs *apis.FieldError) {
+	instanceType := string(r.InstanceType)
+
+	skuHandler, err := utils.GetSKUHandler()
+	if err != nil {
+		errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("Failed to get SKU handler: %v", err), "instanceType"))
+		return errs
+	}
+
+	if skuConfig := skuHandler.GetGPUConfigBySKU(instanceType); skuConfig == nil {
+		provider := os.Getenv("CLOUD_PROVIDER")
+		// Check for other instance types pattern matches if cloud provider is Azure
+		if provider != consts.AzureCloudName || (!strings.HasPrefix(instanceType, N_SERIES_PREFIX) && !strings.HasPrefix(instanceType, D_SERIES_PREFIX)) {
+			errs = errs.Also(apis.ErrInvalidValue(fmt.Sprintf("Unsupported instance type %s. Supported SKUs: %s", instanceType, skuHandler.GetSupportedSKUs()), "instanceType"))
+		}
+	}
+
+	// Validate labelSelector
+	if _, err := metav1.LabelSelectorAsMap(r.LabelSelector); err != nil {
+		errs = errs.Also(apis.ErrInvalidValue(err.Error(), "labelSelector"))
+	}
+
+	return errs
+}
+
+func (e *LocalEmbeddingSpec) validateCreate() (errs *apis.FieldError) {
+	if e.Image == "" && e.ModelID == "" {
+		errs = errs.Also(apis.ErrGeneric("Either image or modelID must be specified, not neither", ""))
+	}
+	if e.Image != "" && e.ModelID != "" {
+		errs = errs.Also(apis.ErrGeneric("Either image or modelID must be specified, but not both", ""))
+	}
+	if e.Image != "" {
+		re := regexp.MustCompile(`^(.+/[^:/]+):([^:/]+)$`)
+		if !re.MatchString(e.Image) {
+			errs = errs.Also(apis.ErrInvalidValue("Invalid image format, require full input image URL", "Image"))
+		} else {
+			// Executes if image is of correct format
+			err := utils.ExtractAndValidateRepoName(e.Image)
+			if err != nil {
+				errs = errs.Also(apis.ErrInvalidValue(err.Error(), "Image"))
+			}
+		}
+	}
+	return errs
+}
+
+func (e *RemoteEmbeddingSpec) validateCreate() (errs *apis.FieldError) {
+	_, err := url.ParseRequestURI(e.URL)
+	if err != nil {
+		errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("URL input error: %v", err), "remote url"))
+	}
+	return errs
+}
+
+func (e *InferenceServiceSpec) validateCreate() (errs *apis.FieldError) {
+	_, err := url.ParseRequestURI(e.URL)
+	if err != nil {
+		errs = errs.Also(apis.ErrGeneric(fmt.Sprintf("URL input error: %v", err), "remote url"))
+	}
+
+	if e.ContextWindowSize <= 0 {
+		errs = errs.Also(apis.ErrInvalidValue("ContextWindowSize must be a positive integer", "contextWindowSize"))
+	}
+
+	return errs
+}