Skip to content

Commit 8f0ea93

Browse files
author
Mengqi Yu
authored
Revision to the pod evaluator cache (#3056)
Address some remaining comments from earlier PR
1 parent bd3c05f commit 8f0ea93

File tree

1 file changed

+62
-49
lines changed

1 file changed

+62
-49
lines changed

porch/func/internal/podevaluator.go

Lines changed: 62 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"google.golang.org/grpc"
3232
"google.golang.org/grpc/credentials/insecure"
3333
corev1 "k8s.io/api/core/v1"
34+
"k8s.io/apimachinery/pkg/api/errors"
3435
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536
"k8s.io/apimachinery/pkg/types"
3637
"k8s.io/apimachinery/pkg/util/wait"
@@ -52,7 +53,7 @@ const (
5253
)
5354

5455
type podEvaluator struct {
55-
requestCh chan *clientConnRequest
56+
requestCh chan<- *clientConnRequest
5657

5758
podCacheManager *podCacheManager
5859
}
@@ -80,7 +81,7 @@ func NewPodEvaluator(namespace, wrapperServerImage string, interval, ttl time.Du
8081
requestCh: reqCh,
8182
podReadyCh: readyCh,
8283
cache: map[string]*podAndGRPCClient{},
83-
waitlists: map[string][]chan *clientConnAndError{},
84+
waitlists: map[string][]chan<- *clientConnAndError{},
8485

8586
podManager: &podManager{
8687
kubeClient: cl,
@@ -103,8 +104,7 @@ func (pe *podEvaluator) EvaluateFunction(ctx context.Context, req *evaluator.Eva
103104
// make a buffer for the channel to prevent unnecessary blocking when the pod cache manager sends it to multiple waiting gorouthine in batch.
104105
ccChan := make(chan *clientConnAndError, 1)
105106
// Send a request to request a grpc client.
106-
pe.podCacheManager.requestCh <- &clientConnRequest{
107-
ctx: ctx,
107+
pe.requestCh <- &clientConnRequest{
108108
image: req.Image,
109109
grpcClientCh: ccChan,
110110
}
@@ -126,26 +126,25 @@ type podCacheManager struct {
126126
gcScanInternal time.Duration
127127
podTTL time.Duration
128128

129-
// requestCh is a channel to send the request to cache manager. The cache manager will send back the grpc client in the embedded channel.
130-
requestCh chan *clientConnRequest
129+
// requestCh is a receive-only channel to receive
130+
requestCh <-chan *clientConnRequest
131131
// podReadyCh is a channel to receive the information when a pod is ready.
132-
podReadyCh chan *imagePodAndGRPCClient
132+
podReadyCh <-chan *imagePodAndGRPCClient
133133

134134
cache map[string]*podAndGRPCClient
135-
waitlists map[string][]chan *clientConnAndError
135+
waitlists map[string][]chan<- *clientConnAndError
136136

137137
podManager *podManager
138138
}
139139

140140
type clientConnRequest struct {
141-
ctx context.Context
142141
image string
143142

144143
unavavilable bool
145144
currClientTarget string
146145

147146
// grpcConn is a channel that a grpc client should be sent back.
148-
grpcClientCh chan *clientConnAndError
147+
grpcClientCh chan<- *clientConnAndError
149148
}
150149

151150
type clientConnAndError struct {
@@ -172,37 +171,49 @@ func (pcm *podCacheManager) podCacheManager() {
172171
podAndCl, found := pcm.cache[req.image]
173172
if found && podAndCl != nil {
174173
// Ensure the pod still exists and is not being deleted before sending the gprc client back to the channel.
175-
// We can't simplly return grpc client from the cache and let evaluator try to connect to the pod.
174+
// We can't simply return grpc client from the cache and let evaluator try to connect to the pod.
176175
// If the pod is deleted by others, it will take ~10 seconds for the evaluator to fail.
177176
// Wasting 10 second is so much, so we check if the pod still exist first.
178177
pod := &corev1.Pod{}
179-
err := pcm.podManager.kubeClient.Get(req.ctx, podAndCl.pod, pod)
180-
if err == nil && pod.DeletionTimestamp == nil {
181-
klog.Infof("reusing the connection to pod %v/%v to evaluate %v", pod.Namespace, pod.Name, req.image)
182-
req.grpcClientCh <- &clientConnAndError{grpcClient: podAndCl.grpcClient}
183-
go patchPodWithUnixTimeAnnotation(pcm.podManager.kubeClient, podAndCl.pod)
184-
break
178+
err := pcm.podManager.kubeClient.Get(context.Background(), podAndCl.pod, pod)
179+
deleteCacheEntry := false
180+
if err == nil {
181+
if pod.DeletionTimestamp == nil {
182+
klog.Infof("reusing the connection to pod %v/%v to evaluate %v", pod.Namespace, pod.Name, req.image)
183+
req.grpcClientCh <- &clientConnAndError{grpcClient: podAndCl.grpcClient}
184+
go patchPodWithUnixTimeAnnotation(pcm.podManager.kubeClient, podAndCl.pod)
185+
break
186+
} else {
187+
deleteCacheEntry = true
188+
}
189+
} else if errors.IsNotFound(err) {
190+
deleteCacheEntry = true
191+
}
192+
// We delete the cache entry if the pod has been deleted or being deleted.
193+
if deleteCacheEntry {
194+
delete(pcm.cache, req.image)
185195
}
186196
}
187197
_, found = pcm.waitlists[req.image]
188198
if !found {
189-
pcm.waitlists[req.image] = []chan *clientConnAndError{}
199+
pcm.waitlists[req.image] = []chan<- *clientConnAndError{}
190200
}
191201
list := pcm.waitlists[req.image]
192-
list = append(list, req.grpcClientCh)
193-
pcm.waitlists[req.image] = list
194-
go pcm.podManager.getFuncEvalPodClient(req.ctx, req.image)
202+
pcm.waitlists[req.image] = append(list, req.grpcClientCh)
203+
go pcm.podManager.getFuncEvalPodClient(context.Background(), req.image)
195204
case resp := <-pcm.podReadyCh:
196-
if resp.err == nil {
205+
if resp.err != nil {
206+
klog.Warningf("received error from the pod manager: %v", resp.err)
197207
pcm.cache[resp.image] = resp.podAndGRPCClient
198-
channels := pcm.waitlists[resp.image]
199-
delete(pcm.waitlists, resp.image)
200-
for i := range channels {
201-
// The channel has one buffer size, nothing will be blocking.
202-
channels[i] <- &clientConnAndError{grpcClient: resp.grpcClient}
208+
}
209+
channels := pcm.waitlists[resp.image]
210+
delete(pcm.waitlists, resp.image)
211+
for i := range channels {
212+
// The channel has one buffer size, nothing will be blocking.
213+
channels[i] <- &clientConnAndError{
214+
grpcClient: resp.grpcClient,
215+
err: resp.err,
203216
}
204-
} else {
205-
klog.Warningf("received error from the pod manager: %v", resp.err)
206217
}
207218
case <-tick:
208219
// synchronous GC
@@ -227,40 +238,44 @@ func (pcm *podCacheManager) garbageCollector() {
227238
continue
228239
}
229240
lastUse, found := pod.Annotations[lastUseTimeAnnotation]
230-
// If a pod doesn't have a last-use annotation, we patch it.
241+
// If a pod doesn't have a last-use annotation, we patch it. This should not happen, but if it happens,
242+
// we give another TTL before deleting it.
231243
if !found {
232244
go patchPodWithUnixTimeAnnotation(pcm.podManager.kubeClient, client.ObjectKeyFromObject(&pod))
233245
continue
234246
} else {
235247
lu, err := strconv.ParseInt(lastUse, 10, 64)
236248
// If the annotation is ill-formatted, we patch it with the current time and will try to GC it later.
249+
// This should not happen, but if it happens, we give another TTL before deleting it.
237250
if err != nil {
238251
klog.Warningf("unable to convert the Unix time string to int64: %w", err)
239252
go patchPodWithUnixTimeAnnotation(pcm.podManager.kubeClient, client.ObjectKeyFromObject(&pod))
240253
continue
241254
}
242255
if time.Unix(lu, 0).Add(pcm.podTTL).Before(time.Now()) {
256+
podIP := pod.Status.PodIP
257+
go func(po corev1.Pod) {
258+
klog.Infof("deleting pod %v/%v", po.Namespace, po.Name)
259+
err := pcm.podManager.kubeClient.Delete(context.Background(), &po)
260+
if err != nil {
261+
klog.Warningf("unable to delete pod %v/%v: %w", po.Namespace, po.Name, err)
262+
}
263+
}(podList.Items[i])
264+
243265
image := pod.Spec.Containers[0].Image
244266
podAndCl, found := pcm.cache[image]
245267
if found {
246-
// We delete the cache entry when its grpc client points to the old pod IP.
247268
host, _, err := net.SplitHostPort(podAndCl.grpcClient.Target())
248-
if err != nil {
249-
klog.Warningf("unable to split the GRPC dialer target to host and port : %w", err)
269+
// If the client target in the cache points to a different pod IP, it means the matching pod is not the current pod.
270+
// We will keep this cache entry.
271+
if err == nil && host != podIP {
250272
continue
251273
}
252-
if host == pod.Status.PodIP {
253-
delete(pcm.cache, image)
254-
}
274+
// We delete the cache entry when the IP of the old pod match the client target in the cache
275+
// or we can't split the host and port in the client target.
276+
delete(pcm.cache, image)
255277
}
256278

257-
go func(po corev1.Pod) {
258-
klog.Infof("deleting pod %v/%v", po.Namespace, po.Name)
259-
err := pcm.podManager.kubeClient.Delete(context.Background(), &po)
260-
if err != nil {
261-
klog.Warningf("unable to delete pod %v/%v: %w", po.Namespace, po.Name, err)
262-
}
263-
}(podList.Items[i])
264279
}
265280
}
266281
}
@@ -274,9 +289,8 @@ type podManager struct {
274289
// wrapperServerImage is the image name of the wrapper server
275290
wrapperServerImage string
276291

277-
// podReadyCh is a channel to send the grpc client information.
278-
// podCacheManager receives from this channel.
279-
podReadyCh chan *imagePodAndGRPCClient
292+
// podReadyCh is a channel to receive requests to get GRPC client from each function evaluation request handler.
293+
podReadyCh chan<- *imagePodAndGRPCClient
280294

281295
// entrypointCache is a cache of image name to entrypoint.
282296
// Only podManager is allowed to touch this cache.
@@ -314,8 +328,8 @@ func (pm *podManager) getFuncEvalPodClient(ctx context.Context, image string) {
314328
}
315329
}
316330

331+
// imageEntrypoint get the entrypoint of a container image by looking at its metadata.
317332
func (pm *podManager) imageEntrypoint(image string) ([]string, error) {
318-
// Create pod otherwise.
319333
var entrypoint []string
320334
ref, err := name.ParseReference(image)
321335
if err != nil {
@@ -342,8 +356,7 @@ func (pm *podManager) imageEntrypoint(image string) ([]string, error) {
342356
}
343357

344358
func (pm *podManager) retrieveOrCreatePod(ctx context.Context, image string) (client.ObjectKey, error) {
345-
// Try to retrieve the pod first.
346-
// Lookup the pod by label to see if there is a pod that can be reused.
359+
// Try to retrieve the pod. Lookup the pod by label to see if there is a pod that can be reused.
347360
// Looking it up locally may not work if there are more than one instance of the function runner,
348361
// since the pod may be created by one the other instance and the current instance is not aware of it.
349362
// TODO: It's possible to set up a Watch in the fn runner namespace, and always try to maintain a up-to-date local cache.

0 commit comments

Comments
 (0)