diff --git a/.github/actions/e2e-base-setup/action.yaml b/.github/actions/e2e-base-setup/action.yaml index 05599710d5..559814e1ed 100644 --- a/.github/actions/e2e-base-setup/action.yaml +++ b/.github/actions/e2e-base-setup/action.yaml @@ -231,7 +231,7 @@ runs: REGISTRY: ${{ env.REGISTRY }} VERSION: ${{ env.VERSION }} TEST_SUITE: ${{ inputs.node_provisioner }} - HELM_INSTALL_EXTRA_ARGS: "--set featureGates.gatewayAPIInferenceExtension=true" + HELM_INSTALL_EXTRA_ARGS: "--set featureGates.gatewayAPIInferenceExtension=true --set featureGates.enableInferenceSetController=true" - name: Set up E2E ACR Credentials and Secret shell: bash diff --git a/.github/workflows/aikit.yaml b/.github/workflows/aikit.yaml index d745a4d2a8..db33449fa3 100644 --- a/.github/workflows/aikit.yaml +++ b/.github/workflows/aikit.yaml @@ -84,7 +84,7 @@ jobs: # Install via Helm # CSI Local Node cannot be configured to not deploy and it crashes on kind on github runners for unknown reasons # so we need to filter it out its components - helm template kaito-workspace ./charts/kaito/workspace --namespace ${{ env.KAITO_NAMESPACE }} --set featureGates.disableNodeAutoProvisioning="true" --include-crds --debug --wait | awk ' + helm template kaito-workspace ./charts/kaito/workspace --namespace ${{ env.KAITO_NAMESPACE }} --set featureGates.disableNodeAutoProvisioning="true" --set featureGates.enableInferenceSetController="true" --include-crds --debug --wait | awk ' BEGIN { RS="---\n"; ORS="---\n"; csi=0 } /kind: ServiceAccount/ && /name: csi-local-node/ { csi=1 } /kind: StorageClass/ && /name: kaito-local-nvme-disk/ { csi=1 } @@ -121,11 +121,41 @@ jobs: EOF kubectl apply -f aikit-workspace.yaml + - name: Create AIKit InferenceSet + run: | + cat << EOF > aikit-inferenceset.yaml + apiVersion: kaito.sh/v1alpha1 + kind: InferenceSet + metadata: + name: inferenceset-aikit-test + spec: + labelSelector: + matchLabels: + apps: aikit-test + replicas: 1 + template: + inference: + template: + spec: + containers: + - name: aikit-llama + image: ${{ env.AIKIT_IMAGE }} + args: + - "run" + - "--address=:5000" + EOF + kubectl apply -f aikit-inferenceset.yaml + - name: Wait for inference to be ready run: | echo "Waiting for inference to be ready..." kubectl wait --for='jsonpath={.status.conditions[?(@.type=="InferenceReady")].status}=True' workspace/workspace-aikit-test --timeout 300s + - name: Wait for inferenceset to be ready + run: | + echo "Waiting for inferenceset to be ready..." + kubectl wait --for='jsonpath={.status.readyReplicas}=1' inferenceset/inferenceset-aikit-test --timeout 300s + - name: Validate response content run: | # Set up port forwarding to the workspace service @@ -180,6 +210,9 @@ jobs: - name: Debug info if: always() run: | + echo "=== InferenceSet Status ===" + kubectl get inferenceset inferenceset-aikit-test -o yaml + echo "=== Workspace Status ===" kubectl get workspace workspace-aikit-test -o yaml diff --git a/api/v1alpha1/inferenceset_types.go b/api/v1alpha1/inferenceset_types.go index b69db71e2c..5bd369826d 100644 --- a/api/v1alpha1/inferenceset_types.go +++ b/api/v1alpha1/inferenceset_types.go @@ -28,6 +28,7 @@ type InferenceSetResourceSpec struct { // InferenceSetTemplate defines the template for creating InferenceSet instances. type InferenceSetTemplate struct { + // +optional Resource InferenceSetResourceSpec `json:"resource"` Inference kaitov1beta1.InferenceSpec `json:"inference"` } diff --git a/charts/kaito/workspace/crds/kaito.sh_inferencesets.yaml b/charts/kaito/workspace/crds/kaito.sh_inferencesets.yaml index f55c192c0d..a247ce11bd 100644 --- a/charts/kaito/workspace/crds/kaito.sh_inferencesets.yaml +++ b/charts/kaito/workspace/crds/kaito.sh_inferencesets.yaml @@ -222,7 +222,6 @@ spec: type: object required: - inference - - resource type: object updateStrategy: default: diff --git a/charts/kaito/workspace/templates/inferenceset_clusterrole.yaml b/charts/kaito/workspace/templates/inferenceset_clusterrole.yaml index 241000cf8f..c45dbb8c25 100644 --- a/charts/kaito/workspace/templates/inferenceset_clusterrole.yaml +++ b/charts/kaito/workspace/templates/inferenceset_clusterrole.yaml @@ -1,3 +1,4 @@ +{{- if .Values.featureGates.enableInferenceSetController -}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -31,3 +32,4 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["create", "patch", "update"] +{{- end -}} diff --git a/charts/kaito/workspace/templates/inferenceset_clusterrole_binding.yaml b/charts/kaito/workspace/templates/inferenceset_clusterrole_binding.yaml index 8e6c2772bc..5b487ccb0e 100644 --- a/charts/kaito/workspace/templates/inferenceset_clusterrole_binding.yaml +++ b/charts/kaito/workspace/templates/inferenceset_clusterrole_binding.yaml @@ -1,3 +1,4 @@ +{{- if .Values.featureGates.enableInferenceSetController -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -12,3 +13,4 @@ subjects: - kind: ServiceAccount name: {{ include "kaito.fullname" . }}-sa namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/charts/kaito/workspace/templates/webhooks.yaml b/charts/kaito/workspace/templates/webhooks.yaml index 3e2b9097ac..5c2c80285d 100644 --- a/charts/kaito/workspace/templates/webhooks.yaml +++ b/charts/kaito/workspace/templates/webhooks.yaml @@ -25,6 +25,7 @@ webhooks: operations: - CREATE - UPDATE +{{- if .Values.featureGates.enableInferenceSetController }} --- apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration @@ -52,3 +53,4 @@ webhooks: operations: - CREATE - UPDATE +{{- end -}} diff --git a/charts/kaito/workspace/values.yaml b/charts/kaito/workspace/values.yaml index cdb3729800..86f3a1d6d1 100644 --- a/charts/kaito/workspace/values.yaml +++ b/charts/kaito/workspace/values.yaml @@ -19,6 +19,7 @@ featureGates: vLLM: true disableNodeAutoProvisioning: false gatewayAPIInferenceExtension: false + enableInferenceSetController: false localCSIDriver: useLocalCSIDriver: true webhook: diff --git a/config/crd/bases/kaito.sh_inferencesets.yaml b/config/crd/bases/kaito.sh_inferencesets.yaml index f55c192c0d..a247ce11bd 100644 --- a/config/crd/bases/kaito.sh_inferencesets.yaml +++ b/config/crd/bases/kaito.sh_inferencesets.yaml @@ -222,7 +222,6 @@ spec: type: object required: - inference - - resource type: object updateStrategy: default: