Skip to content

Commit 877d9b1

Browse files
authored
pod anti-affinity check among nodes (kubernetes-sigs#1033)
* pod anti-affinity check among nodes * avoid pod equality check with UID field also add node equality check with Name for short-cut * add test case for anti-affinity violation among different node * reduce ListPodsOnANode call * fix old code * apply gofumpt -w -extra move klog/v2 import entry to bottom according to master code
1 parent b58ec3b commit 877d9b1

File tree

2 files changed

+127
-15
lines changed

2 files changed

+127
-15
lines changed

pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,36 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Name() string {
7878
}
7979

8080
func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
81+
podsList, err := d.handle.ClientSet().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
82+
if err != nil {
83+
return &frameworktypes.Status{
84+
Err: fmt.Errorf("error listing all pods: %v", err),
85+
}
86+
}
87+
88+
podsInANamespace := groupByNamespace(podsList)
89+
90+
runningPodFilter := func(pod *v1.Pod) bool {
91+
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
92+
}
93+
podsOnANode := groupByNodeName(podsList, podutil.WrapFilterFuncs(runningPodFilter, d.podFilter))
94+
95+
nodeMap := createNodeMap(nodes)
96+
8197
loop:
8298
for _, node := range nodes {
8399
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
84-
pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter)
85-
if err != nil {
86-
return &frameworktypes.Status{
87-
Err: fmt.Errorf("error listing pods: %v", err),
88-
}
89-
}
90-
// sort the evictable Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
100+
pods := podsOnANode[node.Name]
101+
// sort the evict-able Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
91102
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
92103
totalPods := len(pods)
93104
for i := 0; i < totalPods; i++ {
94-
if checkPodsWithAntiAffinityExist(pods[i], pods) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
105+
if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
95106
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{}) {
96107
// Since the current pod is evicted all other pods which have anti-affinity with this
97108
// pod need not be evicted.
98-
// Update pods.
109+
// Update allPods.
110+
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
99111
pods = append(pods[:i], pods[i+1:]...)
100112
i--
101113
totalPods--
@@ -109,8 +121,54 @@ loop:
109121
return nil
110122
}
111123

124+
func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod) map[string][]*v1.Pod {
125+
podList, ok := podMap[podToRemove.Namespace]
126+
if !ok {
127+
return podMap
128+
}
129+
for i := 0; i < len(podList); i++ {
130+
podToCheck := podList[i]
131+
if podToRemove.Name == podToCheck.Name {
132+
podMap[podToRemove.Namespace] = append(podList[:i], podList[i+1:]...)
133+
return podMap
134+
}
135+
}
136+
return podMap
137+
}
138+
139+
func groupByNamespace(pods *v1.PodList) map[string][]*v1.Pod {
140+
m := make(map[string][]*v1.Pod)
141+
for i := 0; i < len(pods.Items); i++ {
142+
pod := &(pods.Items[i])
143+
m[pod.Namespace] = append(m[pod.Namespace], pod)
144+
}
145+
return m
146+
}
147+
148+
func groupByNodeName(pods *v1.PodList, filter podutil.FilterFunc) map[string][]*v1.Pod {
149+
m := make(map[string][]*v1.Pod)
150+
if filter == nil {
151+
filter = func(p *v1.Pod) bool { return true }
152+
}
153+
for i := 0; i < len(pods.Items); i++ {
154+
pod := &(pods.Items[i])
155+
if filter(pod) {
156+
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
157+
}
158+
}
159+
return m
160+
}
161+
162+
func createNodeMap(nodes []*v1.Node) map[string]*v1.Node {
163+
m := make(map[string]*v1.Node, len(nodes))
164+
for _, node := range nodes {
165+
m[node.GetName()] = node
166+
}
167+
return m
168+
}
169+
112170
// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
113-
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
171+
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
114172
affinity := pod.Spec.Affinity
115173
if affinity != nil && affinity.PodAntiAffinity != nil {
116174
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
@@ -120,16 +178,52 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
120178
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
121179
return false
122180
}
123-
for _, existingPod := range pods {
124-
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
125-
return true
181+
for namespace := range namespaces {
182+
for _, existingPod := range pods[namespace] {
183+
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
184+
node, ok := nodeMap[pod.Spec.NodeName]
185+
if !ok {
186+
continue
187+
}
188+
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
189+
if !ok {
190+
continue
191+
}
192+
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
193+
klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod))
194+
return true
195+
}
196+
}
126197
}
127198
}
128199
}
129200
}
130201
return false
131202
}
132203

204+
func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
205+
if node1.Name == node2.Name {
206+
return true
207+
}
208+
node1Labels := node1.Labels
209+
if node1Labels == nil {
210+
return false
211+
}
212+
node2Labels := node2.Labels
213+
if node2Labels == nil {
214+
return false
215+
}
216+
value1, ok := node1Labels[key]
217+
if !ok {
218+
return false
219+
}
220+
value2, ok := node2Labels[key]
221+
if !ok {
222+
return false
223+
}
224+
return value1 == value2
225+
}
226+
133227
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
134228
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
135229
if podAntiAffinity != nil {

pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,27 @@ import (
3838
)
3939

4040
func TestPodAntiAffinity(t *testing.T) {
41-
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
41+
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
42+
node.ObjectMeta.Labels = map[string]string{
43+
"region": "main-region",
44+
}
45+
})
4246
node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) {
4347
node.ObjectMeta.Labels = map[string]string{
4448
"datacenter": "east",
4549
}
4650
})
47-
4851
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
4952
node.Spec = v1.NodeSpec{
5053
Unschedulable: true,
5154
}
5255
})
5356
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
57+
node5 := test.BuildTestNode("n5", 200, 3000, 10, func(node *v1.Node) {
58+
node.ObjectMeta.Labels = map[string]string{
59+
"region": "main-region",
60+
}
61+
})
5462

5563
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
5664
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
@@ -62,6 +70,7 @@ func TestPodAntiAffinity(t *testing.T) {
6270
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
6371
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
6472
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
73+
p11 := test.BuildTestPod("p11", 100, 0, node5.Name, nil)
6574
p9.DeletionTimestamp = &metav1.Time{}
6675
p10.DeletionTimestamp = &metav1.Time{}
6776

@@ -73,6 +82,7 @@ func TestPodAntiAffinity(t *testing.T) {
7382
p5.Labels = map[string]string{"foo": "bar"}
7483
p6.Labels = map[string]string{"foo": "bar"}
7584
p7.Labels = map[string]string{"foo1": "bar1"}
85+
p11.Labels = map[string]string{"foo": "bar"}
7686
nonEvictablePod.Labels = map[string]string{"foo": "bar"}
7787
test.SetNormalOwnerRef(p1)
7888
test.SetNormalOwnerRef(p2)
@@ -83,6 +93,7 @@ func TestPodAntiAffinity(t *testing.T) {
8393
test.SetNormalOwnerRef(p7)
8494
test.SetNormalOwnerRef(p9)
8595
test.SetNormalOwnerRef(p10)
96+
test.SetNormalOwnerRef(p11)
8697

8798
// set pod anti affinity
8899
setPodAntiAffinity(p1, "foo", "bar")
@@ -186,6 +197,13 @@ func TestPodAntiAffinity(t *testing.T) {
186197
expectedEvictedPodCount: 0,
187198
nodeFit: true,
188199
},
200+
{
201+
description: "Evict pod violating anti-affinity among different node (all pods have anti-affinity)",
202+
pods: []*v1.Pod{p1, p11},
203+
nodes: []*v1.Node{node1, node5},
204+
expectedEvictedPodCount: 1,
205+
nodeFit: false,
206+
},
189207
}
190208

191209
for _, test := range tests {

0 commit comments

Comments
 (0)