This repository was archived by the owner on Jan 12, 2023. It is now read-only.
generated from cruise-automation/oss-template
-
Notifications
You must be signed in to change notification settings - Fork 53
SPIKE: Operator to evict tainted pods - issue 18 #47
Closed
Closed
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| k-rail | ||
| vendor | ||
| /k-rail | ||
| /evicter | ||
| vendor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Evict tainted pods after period | ||
| Operator that runs within k8s to find and evict tainted pods | ||
|
|
||
| * `k-rails/tainted-timestamp` | ||
| * `k-rails/tainted-prevent-eviction` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "strconv" | ||
| "time" | ||
|
|
||
| "github.com/pkg/errors" | ||
| v1 "k8s.io/api/core/v1" | ||
| "k8s.io/apimachinery/pkg/util/runtime" | ||
| "k8s.io/apimachinery/pkg/util/wait" | ||
| "k8s.io/client-go/tools/cache" | ||
| "k8s.io/client-go/util/workqueue" | ||
| "k8s.io/klog" | ||
| ) | ||
|
|
||
| type podProvisioner interface { | ||
| Evict(pod *v1.Pod, reason string) error | ||
| } | ||
|
|
||
| type Controller struct { | ||
| podStore cache.Indexer | ||
| queue workqueue.RateLimitingInterface | ||
| informer cache.Controller | ||
| podProvisioner podProvisioner | ||
| incubationPeriodSeconds time.Duration | ||
| started time.Time | ||
| } | ||
|
|
||
| func NewController(queue workqueue.RateLimitingInterface, indexer cache.Indexer, informer cache.Controller, podProvisioner podProvisioner, incubationPeriodSeconds int64) *Controller { | ||
| return &Controller{ | ||
| informer: informer, | ||
| podStore: indexer, | ||
| queue: queue, | ||
| podProvisioner: podProvisioner, | ||
| incubationPeriodSeconds: time.Duration(incubationPeriodSeconds) * time.Second, | ||
| } | ||
| } | ||
|
|
||
| func (c *Controller) processNextItem() bool { | ||
| key, quit := c.queue.Get() | ||
| if quit { | ||
| return false | ||
| } | ||
| defer c.queue.Done(key) | ||
|
|
||
| err := c.evictPod(key.(string)) | ||
| c.handleErr(err, key) | ||
| return true | ||
| } | ||
|
|
||
| const ( | ||
| annotationPreventEviction = "k-rails/tainted-prevent-eviction" | ||
| annotationTimestamp = "k-rails/tainted-timestamp" | ||
| annotationReason = "k-rails/tainted-reason" | ||
| ) | ||
| const defaultEvictionReason = "exec" | ||
|
|
||
| // evictPod is the business logic of the controller. it checks the the eviction rules and conditions before calling the pod provisioner. | ||
| func (c *Controller) evictPod(key string) error { | ||
| obj, exists, err := c.podStore.GetByKey(key) | ||
| switch { | ||
| case err != nil: | ||
| return err | ||
| case !exists: | ||
| return nil | ||
| } | ||
| pod, ok := obj.(*v1.Pod) | ||
| if !ok { | ||
| return fmt.Errorf("unsupported type: %T", obj) | ||
| } | ||
| if !canEvict(pod, c.incubationPeriodSeconds) { | ||
| return nil | ||
| } | ||
|
|
||
| reason, ok := pod.Annotations[annotationReason] | ||
| if !ok || reason == "" { | ||
| reason = defaultEvictionReason | ||
| } | ||
|
|
||
| return c.podProvisioner.Evict(pod, reason) | ||
| } | ||
|
|
||
| func canEvict(pod *v1.Pod, incubationPeriod time.Duration) bool { | ||
| if pod == nil { | ||
| return false | ||
| } | ||
| val, ok := pod.Annotations[annotationPreventEviction] | ||
| if ok { | ||
| if val == "yes" || val == "true" { | ||
| return false | ||
| } | ||
| } | ||
|
|
||
| val, ok = pod.Annotations[annotationTimestamp] | ||
| if ok { | ||
| i, err := strconv.ParseInt(val, 10, 64) | ||
| if err != nil { | ||
| // todo: log | ||
| return true | ||
| } | ||
| timestamp := time.Unix(i, 0) | ||
| if time.Since(timestamp) < incubationPeriod { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| const maxWorkerRetries = 5 | ||
|
|
||
| // handleErr checks if an error happened and makes sure we will retry later. | ||
| func (c *Controller) handleErr(err error, key interface{}) { | ||
| if err == nil { | ||
| // Forget about the #AddRateLimited history of the key on every successful synchronization. | ||
| // This ensures that future processing of updates for this key is not delayed because of | ||
| // an outdated error history. | ||
| c.queue.Forget(key) | ||
| return | ||
| } | ||
|
|
||
| // This controller retries 5 times if something goes wrong. After that, it stops trying. | ||
| if c.queue.NumRequeues(key) < maxWorkerRetries { | ||
| klog.Infof("Error syncing pod %v: %v", key, err) | ||
|
|
||
| // Re-enqueue the key rate limited. Based on the rate limiter on the | ||
| // queue and the re-enqueue history, the key will be processed later again. | ||
| c.queue.AddRateLimited(key) | ||
| return | ||
| } | ||
|
|
||
| c.queue.Forget(key) | ||
| // Report to an external entity that, even after several retries, we could not successfully process this key | ||
| runtime.HandleError(err) | ||
| klog.Infof("Dropping pod %q out of the queue: %v", key, err) | ||
| } | ||
|
|
||
| const reconciliationTick = 30 * time.Second | ||
| const startupGracePeriod = 90 * time.Second | ||
|
|
||
| func (c *Controller) Run(threadiness int, stopCh chan struct{}) { | ||
| defer runtime.HandleCrash() | ||
|
|
||
| // Let the workers stop when we are done | ||
| defer c.queue.ShutDown() | ||
| klog.Info("Starting Pod controller") | ||
|
|
||
| go c.informer.Run(stopCh) | ||
|
|
||
| // Wait for all involved caches to be synced, before processing items from the queue is started | ||
| if !cache.WaitForCacheSync(stopCh, c.informer.HasSynced) { | ||
| runtime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) | ||
| return | ||
| } | ||
|
|
||
| for i := 0; i < threadiness; i++ { | ||
| go wait.Until(c.runWorker, time.Second, stopCh) | ||
| } | ||
|
|
||
| wait.Until(func() { | ||
| if time.Since(c.started) < startupGracePeriod { | ||
| return | ||
| } | ||
| if err := c.doReconciliation(); err != nil { | ||
| klog.Errorf("Reconciliation failed: %s", err) | ||
| } | ||
| }, reconciliationTick, stopCh) | ||
|
|
||
| klog.Info("Stopping Pod controller") | ||
| } | ||
|
|
||
| func (c *Controller) runWorker() { | ||
| for c.processNextItem() { | ||
| } | ||
| } | ||
|
|
||
| func (c *Controller) doReconciliation() error { | ||
| klog.Info("Reconciliation started") | ||
| for _, key := range c.podStore.ListKeys() { | ||
| if err := c.evictPod(key); err != nil { | ||
| return errors.Wrapf(err, "pod %q", key) | ||
| } | ||
| } | ||
| klog.Info("Reconciliation completed") | ||
| return nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,118 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "flag" | ||
|
|
||
| "github.com/pkg/errors" | ||
| v1 "k8s.io/api/core/v1" | ||
| policy "k8s.io/api/policy/v1beta1" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/client-go/kubernetes" | ||
| "k8s.io/client-go/kubernetes/scheme" | ||
| "k8s.io/client-go/kubernetes/typed/policy/v1beta1" | ||
| "k8s.io/client-go/tools/cache" | ||
| "k8s.io/client-go/tools/clientcmd" | ||
| "k8s.io/client-go/tools/events" | ||
| "k8s.io/client-go/util/workqueue" | ||
| "k8s.io/klog" | ||
| ) | ||
|
|
||
| func main() { | ||
| var ( | ||
| kubeconfig = flag.String("kubeconfig", "", "absolute path to the kubeconfig file: `<home>/.kube/config`") | ||
| master = flag.String("master", "", "master url") | ||
| labelSelector = flag.String("label-selector", "tainted=true", "label selector to discover tainted pods") | ||
| terminationGracePeriodSeconds = flag.Int64("termination-grace-period", 30, "pod termination grace period in seconds") | ||
| taintedIncubationPeriodSeconds = flag.Int64("incubation-period", 24*60*60, "time in seconds a tainted pod can run before eviction") | ||
| ) | ||
| flag.Parse() | ||
| flag.Set("logtostderr", "true") // glog: no disk log | ||
|
|
||
| config, err := clientcmd.BuildConfigFromFlags(*master, *kubeconfig) | ||
alpe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| klog.Fatal(err) | ||
| } | ||
|
|
||
| clientset, err := kubernetes.NewForConfig(config) | ||
| if err != nil { | ||
| klog.Fatal(err) | ||
| } | ||
| podListWatcher := cache.NewFilteredListWatchFromClient(clientset.CoreV1().RESTClient(), "pods", metav1.NamespaceDefault, | ||
| func(options *metav1.ListOptions) { | ||
| options.LabelSelector = *labelSelector | ||
| }) | ||
|
|
||
| queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) | ||
| // Bind the workqueue to a cache with the help of an informer. This way we make sure that | ||
| // whenever the cache is updated, the pod key is added to the workqueue. | ||
| // Note that when we finally process the item from the workqueue, we might see a newer version | ||
| // of the Pod than the version which was responsible for triggering the update. | ||
| indexer, informer := cache.NewIndexerInformer(podListWatcher, &v1.Pod{}, 0, cache.ResourceEventHandlerFuncs{ | ||
| AddFunc: func(obj interface{}) { | ||
| if key, err := cache.MetaNamespaceKeyFunc(obj); err == nil { | ||
| queue.Add(key) | ||
| } | ||
| }, | ||
| UpdateFunc: func(old interface{}, new interface{}) { | ||
| if key, err := cache.MetaNamespaceKeyFunc(new); err == nil { | ||
| queue.Add(key) | ||
| } | ||
| }, | ||
| DeleteFunc: func(obj interface{}) { | ||
| // IndexerInformer uses a delta queue, therefore for deletes we have to use this key function. | ||
| if key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj); err == nil { | ||
| queue.Add(key) | ||
| } | ||
| }, | ||
| }, cache.Indexers{}) | ||
|
|
||
| stop := make(chan struct{}) | ||
| defer close(stop) | ||
|
|
||
| eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: clientset.EventsV1beta1().Events("")}) | ||
| eventBroadcaster.StartRecordingToSink(stop) | ||
| defer eventBroadcaster.Shutdown() | ||
|
|
||
| evicter := newPodEvicter(clientset.PolicyV1beta1(), eventBroadcaster.NewRecorder(scheme.Scheme, "k-rail-evicter"), *terminationGracePeriodSeconds) | ||
| controller := NewController(queue, indexer, informer, evicter, *taintedIncubationPeriodSeconds) | ||
|
|
||
| go controller.Run(1, stop) | ||
|
|
||
| // todo: watch sigterm | ||
| // todo: recover panic to log | ||
| select {} | ||
| } | ||
|
|
||
| type podEvicter struct { | ||
| client v1beta1.PolicyV1beta1Interface | ||
| eventRecorder events.EventRecorder | ||
| defaultDeleteOptions *metav1.DeleteOptions | ||
| } | ||
|
|
||
| func newPodEvicter(client v1beta1.PolicyV1beta1Interface, recorder events.EventRecorder, gracePeriodSeconds int64) *podEvicter { | ||
| return &podEvicter{ | ||
| client: client, | ||
| eventRecorder: recorder, | ||
| defaultDeleteOptions: &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriodSeconds}, | ||
| } | ||
| } | ||
|
|
||
| func (p *podEvicter) Evict(pod *v1.Pod, reason string) error { | ||
| err := p.client.Evictions(pod.Namespace).Evict(newEviction(pod, p.defaultDeleteOptions)) | ||
| if err != nil { | ||
| return errors.Wrap(err, "eviction") | ||
| } | ||
| p.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Eviction", "") | ||
| return nil | ||
| } | ||
|
|
||
| func newEviction(pod *v1.Pod, deleteOption *metav1.DeleteOptions) *policy.Eviction { | ||
| return &policy.Eviction{ | ||
| TypeMeta: metav1.TypeMeta{ | ||
| APIVersion: "Policy/v1beta1", | ||
| Kind: "Eviction", | ||
| }, | ||
| ObjectMeta: pod.ObjectMeta, | ||
| DeleteOptions: deleteOption, | ||
| } | ||
| } | ||
File renamed without changes.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.