Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/e2e-base-setup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ runs:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}
TEST_SUITE: ${{ inputs.node_provisioner }}
HELM_INSTALL_EXTRA_ARGS: "--set featureGates.gatewayAPIInferenceExtension=true"
HELM_INSTALL_EXTRA_ARGS: "--set featureGates.gatewayAPIInferenceExtension=true --set featureGates.enableInferenceSetController=true"

- name: Set up E2E ACR Credentials and Secret
shell: bash
Expand Down
35 changes: 34 additions & 1 deletion .github/workflows/aikit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
# Install via Helm
# CSI Local Node cannot be configured to not deploy and it crashes on kind on github runners for unknown reasons
# so we need to filter it out its components
helm template kaito-workspace ./charts/kaito/workspace --namespace ${{ env.KAITO_NAMESPACE }} --set featureGates.disableNodeAutoProvisioning="true" --include-crds --debug --wait | awk '
helm template kaito-workspace ./charts/kaito/workspace --namespace ${{ env.KAITO_NAMESPACE }} --set featureGates.disableNodeAutoProvisioning="true" --set featureGates.enableInferenceSetController="true" --include-crds --debug --wait | awk '
BEGIN { RS="---\n"; ORS="---\n"; csi=0 }
/kind: ServiceAccount/ && /name: csi-local-node/ { csi=1 }
/kind: StorageClass/ && /name: kaito-local-nvme-disk/ { csi=1 }
Expand Down Expand Up @@ -121,11 +121,41 @@ jobs:
EOF
kubectl apply -f aikit-workspace.yaml

- name: Create AIKit InferenceSet
run: |
cat << EOF > aikit-inferenceset.yaml
apiVersion: kaito.sh/v1alpha1
kind: InferenceSet
metadata:
name: inferenceset-aikit-test
spec:
labelSelector:
matchLabels:
apps: aikit-test
replicas: 1
template:
inference:
template:
spec:
containers:
- name: aikit-llama
image: ${{ env.AIKIT_IMAGE }}
args:
- "run"
- "--address=:5000"
EOF
kubectl apply -f aikit-inferenceset.yaml

- name: Wait for inference to be ready
run: |
echo "Waiting for inference to be ready..."
kubectl wait --for='jsonpath={.status.conditions[?(@.type=="InferenceReady")].status}=True' workspace/workspace-aikit-test --timeout 300s

- name: Wait for inferenceset to be ready
run: |
echo "Waiting for inferenceset to be ready..."
kubectl wait --for='jsonpath={.status.readyReplicas}=1' inferenceset/inferenceset-aikit-test --timeout 300s

- name: Validate response content
run: |
# Set up port forwarding to the workspace service
Expand Down Expand Up @@ -180,6 +210,9 @@ jobs:
- name: Debug info
if: always()
run: |
echo "=== InferenceSet Status ==="
kubectl get inferenceset inferenceset-aikit-test -o yaml

echo "=== Workspace Status ==="
kubectl get workspace workspace-aikit-test -o yaml

Expand Down
1 change: 1 addition & 0 deletions api/v1alpha1/inferenceset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type InferenceSetResourceSpec struct {

// InferenceSetTemplate defines the template for creating InferenceSet instances.
type InferenceSetTemplate struct {
// +optional
Resource InferenceSetResourceSpec `json:"resource"`
Inference kaitov1beta1.InferenceSpec `json:"inference"`
}
Expand Down
1 change: 0 additions & 1 deletion charts/kaito/workspace/crds/kaito.sh_inferencesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ spec:
type: object
required:
- inference
- resource
type: object
updateStrategy:
default:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if .Values.featureGates.enableInferenceSetController -}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down Expand Up @@ -31,3 +32,4 @@ rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "patch", "update"]
{{- end -}}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if .Values.featureGates.enableInferenceSetController -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
Expand All @@ -12,3 +13,4 @@ subjects:
- kind: ServiceAccount
name: {{ include "kaito.fullname" . }}-sa
namespace: {{ .Release.Namespace }}
{{- end -}}
2 changes: 2 additions & 0 deletions charts/kaito/workspace/templates/webhooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ webhooks:
operations:
- CREATE
- UPDATE
{{- if .Values.featureGates.enableInferenceSetController }}
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
Expand Down Expand Up @@ -52,3 +53,4 @@ webhooks:
operations:
- CREATE
- UPDATE
{{- end -}}
1 change: 1 addition & 0 deletions charts/kaito/workspace/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ featureGates:
vLLM: true
disableNodeAutoProvisioning: false
gatewayAPIInferenceExtension: false
enableInferenceSetController: false
localCSIDriver:
useLocalCSIDriver: true
webhook:
Expand Down
1 change: 0 additions & 1 deletion config/crd/bases/kaito.sh_inferencesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ spec:
type: object
required:
- inference
- resource
type: object
updateStrategy:
default:
Expand Down
Loading