@@ -27,15 +27,18 @@ import (
2727 "github.com/onsi/gomega"
2828 corev1 "k8s.io/api/core/v1"
2929 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30+ "k8s.io/apimachinery/pkg/types"
3031 "k8s.io/client-go/tools/clientcmd"
3132 "sigs.k8s.io/controller-runtime/pkg/client"
3233 "sigs.k8s.io/controller-runtime/pkg/manager"
3334
3435 config "sigs.k8s.io/kueue/apis/config/v1beta2"
3536 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta2"
37+ workloadjob "sigs.k8s.io/kueue/pkg/controller/jobs/job"
3638 "sigs.k8s.io/kueue/pkg/features"
3739 utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
3840 utiltestingapi "sigs.k8s.io/kueue/pkg/util/testing/v1beta2"
41+ testingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job"
3942 "sigs.k8s.io/kueue/test/util"
4043)
4144
@@ -722,4 +725,168 @@ var _ = ginkgo.Describe("MultiKueue", ginkgo.Ordered, ginkgo.ContinueOnFailure,
722725 })
723726 })
724727 })
728+
729+ ginkgo .It ("Should properly detect insecure kubeconfig of MultiKueueClusters and remove remote client" , func () {
730+ var w1KubeconfigInvalidBytes []byte
731+ ginkgo .By ("Create a kubeconfig with an invalid certificate authority path" , func () {
732+ cfg , err := worker1TestCluster .kubeConfigBytes ()
733+ gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
734+
735+ w1KubeconfigInvalid , err := clientcmd .Load (cfg )
736+ gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
737+ gomega .Expect (w1KubeconfigInvalid ).NotTo (gomega .BeNil ())
738+
739+ w1KubeconfigInvalid .Clusters ["default-cluster" ].CertificateAuthority = "/some/random/path"
740+ w1KubeconfigInvalidBytes , err = clientcmd .Write (* w1KubeconfigInvalid )
741+ gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
742+ })
743+
744+ secret := & corev1.Secret {
745+ ObjectMeta : metav1.ObjectMeta {
746+ Name : "testing-secret" ,
747+ Namespace : managersConfigNamespace .Name ,
748+ },
749+ Data : map [string ][]byte {
750+ kueue .MultiKueueConfigSecretKey : w1KubeconfigInvalidBytes ,
751+ },
752+ }
753+
754+ ginkgo .By ("creating the secret, with insecure kubeconfig" , func () {
755+ gomega .Expect (managerTestCluster .client .Create (managerTestCluster .ctx , secret )).Should (gomega .Succeed ())
756+ ginkgo .DeferCleanup (func () error { return managerTestCluster .client .Delete (managerTestCluster .ctx , secret ) })
757+ })
758+
759+ secretKey := client .ObjectKeyFromObject (secret )
760+ clusterKey := client .ObjectKeyFromObject (workerCluster1 )
761+ acKey := client .ObjectKeyFromObject (multiKueueAC )
762+ cqKey := client .ObjectKeyFromObject (managerCq )
763+
764+ ginkgo .By ("updating the cluster, the worker1 cluster becomes inactive" , func () {
765+ updatedCluster := kueue.MultiKueueCluster {}
766+ ginkgo .By ("updating the cluster spec" , func () {
767+ gomega .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , clusterKey , & updatedCluster )).To (gomega .Succeed ())
768+ updatedCluster .Spec .KubeConfig .LocationType = kueue .SecretLocationType
769+ updatedCluster .Spec .KubeConfig .Location = secret .Name
770+ gomega .Expect (managerTestCluster .client .Update (managerTestCluster .ctx , & updatedCluster )).To (gomega .Succeed ())
771+ })
772+
773+ ginkgo .By ("wait for the status update" , func () {
774+ gomega .Eventually (func (g gomega.Gomega ) {
775+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , clusterKey , & updatedCluster )).To (gomega .Succeed ())
776+ g .Expect (updatedCluster .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
777+ Type : kueue .MultiKueueClusterActive ,
778+ Status : metav1 .ConditionFalse ,
779+ Reason : "InsecureKubeConfig" ,
780+ Message : "insecure kubeconfig: certificate-authority file paths are not allowed, use certificate-authority-data for cluster default-cluster" ,
781+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
782+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
783+
784+ gomega .Eventually (func (g gomega.Gomega ) {
785+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , acKey , multiKueueAC )).To (gomega .Succeed ())
786+ g .Expect (multiKueueAC .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
787+ Type : kueue .AdmissionCheckActive ,
788+ Status : metav1 .ConditionTrue ,
789+ Reason : "SomeActiveClusters" ,
790+ Message : "Inactive clusters: [worker1]" ,
791+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
792+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
793+
794+ gomega .Eventually (func (g gomega.Gomega ) {
795+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , cqKey , managerCq )).To (gomega .Succeed ())
796+ g .Expect (managerCq .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
797+ Type : kueue .ClusterQueueActive ,
798+ Status : metav1 .ConditionTrue ,
799+ Reason : "Ready" ,
800+ Message : "Can admit new workloads" ,
801+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
802+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
803+ })
804+ })
805+
806+ job := testingjob .MakeJob ("job1" , managerNs .Name ).
807+ Queue (kueue .LocalQueueName (managerLq .Name )).
808+ Obj ()
809+ ginkgo .By ("create a job and reserve quota, only existing remoteClients are part of the NominatedClusterNames" , func () {
810+ util .MustCreate (managerTestCluster .ctx , managerTestCluster .client , job )
811+ wlLookupKey := types.NamespacedName {Name : workloadjob .GetWorkloadNameForJob (job .Name , job .UID ), Namespace : managerNs .Name }
812+
813+ ginkgo .By ("setting workload reservation in the management cluster" , func () {
814+ admission := utiltestingapi .MakeAdmission (managerCq .Name ).Obj ()
815+ util .SetQuotaReservation (managerTestCluster .ctx , managerTestCluster .client , wlLookupKey , admission )
816+ })
817+
818+ ginkgo .By ("verify remote clients are managed correctly: worker1 was removed and worker2 is still active" , func () {
819+ managerWl := & kueue.Workload {}
820+ gomega .Eventually (func (g gomega.Gomega ) {
821+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , wlLookupKey , managerWl )).To (gomega .Succeed ())
822+ g .Expect (managerWl .Status .NominatedClusterNames ).NotTo (gomega .ContainElements (workerCluster1 .Name ))
823+ g .Expect (managerWl .Status .NominatedClusterNames ).To (gomega .ContainElements (workerCluster2 .Name ))
824+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
825+ })
826+
827+ ginkgo .By ("unsetting workload reservation in the management cluster to cleanup workloads" , func () {
828+ util .SetQuotaReservation (managerTestCluster .ctx , managerTestCluster .client , wlLookupKey , nil )
829+ })
830+ })
831+
832+ ginkgo .By ("updating the secret with a valid kubeconfig" , func () {
833+ w1Kubeconfig , err := worker1TestCluster .kubeConfigBytes ()
834+ gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
835+ updatedSecret := & corev1.Secret {}
836+ gomega .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , secretKey , updatedSecret )).To (gomega .Succeed ())
837+ updatedSecret .Data [kueue .MultiKueueConfigSecretKey ] = w1Kubeconfig
838+ gomega .Expect (managerTestCluster .client .Update (managerTestCluster .ctx , updatedSecret )).To (gomega .Succeed ())
839+ })
840+
841+ ginkgo .By ("the worker1 cluster becomes active" , func () {
842+ cluster := kueue.MultiKueueCluster {}
843+ gomega .EventuallyWithOffset (1 , func (g gomega.Gomega ) {
844+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , clusterKey , & cluster )).To (gomega .Succeed ())
845+ g .Expect (cluster .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
846+ Type : kueue .MultiKueueClusterActive ,
847+ Status : metav1 .ConditionTrue ,
848+ Reason : "Active" ,
849+ Message : "Connected" ,
850+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
851+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
852+
853+ ac := kueue.AdmissionCheck {}
854+ gomega .EventuallyWithOffset (1 , func (g gomega.Gomega ) {
855+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , acKey , & ac )).To (gomega .Succeed ())
856+ g .Expect (ac .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
857+ Type : kueue .AdmissionCheckActive ,
858+ Status : metav1 .ConditionTrue ,
859+ Reason : "Active" ,
860+ Message : "The admission check is active" ,
861+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
862+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
863+
864+ cq := & kueue.ClusterQueue {}
865+ gomega .EventuallyWithOffset (1 , func (g gomega.Gomega ) {
866+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , cqKey , cq )).To (gomega .Succeed ())
867+ g .Expect (cq .Status .Conditions ).To (gomega .ContainElement (gomega .BeComparableTo (metav1.Condition {
868+ Type : kueue .ClusterQueueActive ,
869+ Status : metav1 .ConditionTrue ,
870+ Reason : "Ready" ,
871+ Message : "Can admit new workloads" ,
872+ }, util .IgnoreConditionTimestampsAndObservedGeneration )))
873+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
874+ })
875+
876+ ginkgo .By ("reserve quota again and verify that existing remoteClients are part of the NominatedClusterNames" , func () {
877+ wlLookupKey := types.NamespacedName {Name : workloadjob .GetWorkloadNameForJob (job .Name , job .UID ), Namespace : managerNs .Name }
878+ ginkgo .By ("setting workload reservation in the management cluster" , func () {
879+ admission := utiltestingapi .MakeAdmission (managerCq .Name ).Obj ()
880+ util .SetQuotaReservation (managerTestCluster .ctx , managerTestCluster .client , wlLookupKey , admission )
881+ })
882+
883+ ginkgo .By ("verify remote clients are managed correctly: both worker1 and worker2 are active" , func () {
884+ managerWl := & kueue.Workload {}
885+ gomega .Eventually (func (g gomega.Gomega ) {
886+ g .Expect (managerTestCluster .client .Get (managerTestCluster .ctx , wlLookupKey , managerWl )).To (gomega .Succeed ())
887+ g .Expect (managerWl .Status .NominatedClusterNames ).To (gomega .ContainElements (workerCluster1 .Name , workerCluster2 .Name ))
888+ }, util .Timeout , util .Interval ).Should (gomega .Succeed ())
889+ })
890+ })
891+ })
725892})
0 commit comments