Skip to content

Commit 6008784

Browse files
authored
feat: cache upstream during update (#3154)
1 parent 7a89a83 commit 6008784

File tree

5 files changed

+259
-76
lines changed

5 files changed

+259
-76
lines changed

internal/gitutil/gitutil.go

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,26 @@ func (g *GitLocalRunner) run(ctx context.Context, verbose bool, command string,
137137
}, nil
138138
}
139139

140+
type NewGitUpstreamRepoOption func(*GitUpstreamRepo)
141+
142+
func WithFetchedRefs(a map[string]bool) NewGitUpstreamRepoOption {
143+
return func(g *GitUpstreamRepo) {
144+
g.fetchedRefs = a
145+
}
146+
}
147+
140148
// NewGitUpstreamRepo returns a new GitUpstreamRepo for an upstream package.
141-
func NewGitUpstreamRepo(ctx context.Context, uri string) (*GitUpstreamRepo, error) {
149+
func NewGitUpstreamRepo(ctx context.Context, uri string, opts ...NewGitUpstreamRepoOption) (*GitUpstreamRepo, error) {
142150
const op errors.Op = "gitutil.NewGitUpstreamRepo"
143-
144151
g := &GitUpstreamRepo{
145152
URI: uri,
146153
}
154+
for _, opt := range opts {
155+
opt(g)
156+
}
157+
if g.fetchedRefs == nil {
158+
g.fetchedRefs = map[string]bool{}
159+
}
147160
if err := g.updateRefs(ctx); err != nil {
148161
return nil, errors.E(op, errors.Repo(uri), err)
149162
}
@@ -161,6 +174,17 @@ type GitUpstreamRepo struct {
161174
// Tags contains all tag refs in the upstream repo as well as the
162175
// each of the are referencing.
163176
Tags map[string]string
177+
178+
// fetchedRefs keeps track of refs already fetched from remote
179+
fetchedRefs map[string]bool
180+
}
181+
182+
func (gur *GitUpstreamRepo) GetFetchedRefs() []string {
183+
fetchedRefs := make([]string, 0, len(gur.fetchedRefs))
184+
for ref := range gur.fetchedRefs {
185+
fetchedRefs = append(fetchedRefs, ref)
186+
}
187+
return fetchedRefs
164188
}
165189

166190
// updateRefs fetches all refs from the upstream git repo, parses the results
@@ -374,20 +398,26 @@ loop:
374398
// commit sha.
375399
validFullSha := s == strings.TrimSpace(rr.Stdout)
376400
_, resolved := gur.ResolveRef(s)
377-
401+
// check if ref was previously fetched
402+
// we use the ref s as the cache key
403+
_, fetched := gur.fetchedRefs[s]
378404
switch {
405+
case fetched:
406+
// skip refetching if previously fetched
407+
break
379408
case resolved || validFullSha:
380409
// If the ref references a branch or a tag, or is a valid commit
381-
// sha, we can fetch just a single commit.
410+
// sha and has not already been fetched, we can fetch just a single commit.
382411
if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin", "--depth=1", s); err != nil {
383412
AmendGitExecError(err, func(e *GitExecError) {
384413
e.Repo = uri
385-
e.Command = "origin"
414+
e.Command = "fetch"
386415
e.Ref = s
387416
})
388417
return "", errors.E(op, errors.Git, fmt.Errorf(
389418
"error running `git fetch` for ref %q: %w", s, err))
390419
}
420+
gur.fetchedRefs[s] = true
391421
default:
392422
// In other situations (like a short commit sha), we have to do
393423
// a full fetch from the remote.
@@ -407,6 +437,7 @@ loop:
407437
return "", errors.E(op, errors.Git, fmt.Errorf(
408438
"error verifying results from fetch: %w", err))
409439
}
440+
gur.fetchedRefs[s] = true
410441
// If we did a full fetch, we already have all refs, so we can just
411442
// exit the loop.
412443
break loop

internal/util/fetch/fetch.go

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func (c Command) Run(ctx context.Context) error {
6262
Path: g.Directory,
6363
Ref: g.Ref,
6464
}
65-
err = cloneAndCopy(ctx, repoSpec, c.Pkg.UniquePath.String())
65+
err = NewCloner(repoSpec).cloneAndCopy(ctx, c.Pkg.UniquePath.String())
6666
if err != nil {
6767
return errors.E(op, c.Pkg.UniquePath, err)
6868
}
@@ -94,21 +94,55 @@ func (c Command) validate(kf *kptfilev1.KptFile) error {
9494
return nil
9595
}
9696

97+
// Cloner clones an upstream repo defined by a repoSpec.
98+
// Optionally, previously cloned repos can be cached
99+
// rather than recloning them each time.
100+
type Cloner struct {
101+
// repoSpec spec to clone
102+
repoSpec *git.RepoSpec
103+
104+
// cachedRepos
105+
cachedRepo map[string]*gitutil.GitUpstreamRepo
106+
}
107+
108+
type NewClonerOption func(*Cloner)
109+
110+
func WithCachedRepo(r map[string]*gitutil.GitUpstreamRepo) NewClonerOption {
111+
return func(c *Cloner) {
112+
c.cachedRepo = r
113+
}
114+
}
115+
116+
func NewCloner(r *git.RepoSpec, opts ...NewClonerOption) *Cloner {
117+
c := &Cloner{
118+
repoSpec: r,
119+
}
120+
for _, opt := range opts {
121+
opt(c)
122+
}
123+
if c.cachedRepo == nil {
124+
c.cachedRepo = make(map[string]*gitutil.GitUpstreamRepo)
125+
}
126+
return c
127+
}
128+
97129
// cloneAndCopy fetches the provided repo and copies the content into the
98130
// directory specified by dest. The provided name is set as `metadata.name`
99131
// of the Kptfile of the package.
100-
func cloneAndCopy(ctx context.Context, r *git.RepoSpec, dest string) error {
132+
func (c *Cloner) cloneAndCopy(ctx context.Context, dest string) error {
101133
const op errors.Op = "fetch.cloneAndCopy"
102134
pr := printer.FromContextOrDie(ctx)
103135

104-
err := ClonerUsingGitExec(ctx, r)
136+
err := c.ClonerUsingGitExec(ctx)
105137
if err != nil {
106138
return errors.E(op, errors.Git, types.UniquePath(dest), err)
107139
}
108-
defer os.RemoveAll(r.Dir)
140+
defer os.RemoveAll(c.repoSpec.Dir)
141+
// update cache before removing clone dir
142+
defer delete(c.cachedRepo, c.repoSpec.CloneSpec())
109143

110-
sourcePath := filepath.Join(r.Dir, r.Path)
111-
pr.Printf("Adding package %q.\n", strings.TrimPrefix(r.Path, "/"))
144+
sourcePath := filepath.Join(c.repoSpec.Dir, c.repoSpec.Path)
145+
pr.Printf("Adding package %q.\n", strings.TrimPrefix(c.repoSpec.Path, "/"))
112146
if err := pkgutil.CopyPackage(sourcePath, dest, true, pkg.All); err != nil {
113147
return errors.E(op, types.UniquePath(dest), err)
114148
}
@@ -117,7 +151,7 @@ func cloneAndCopy(ctx context.Context, r *git.RepoSpec, dest string) error {
117151
return errors.E(op, types.UniquePath(dest), err)
118152
}
119153

120-
if err := kptfileutil.UpdateUpstreamLockFromGit(dest, r); err != nil {
154+
if err := kptfileutil.UpdateUpstreamLockFromGit(dest, c.repoSpec); err != nil {
121155
return errors.E(op, errors.Git, types.UniquePath(dest), err)
122156
}
123157
return nil
@@ -129,46 +163,51 @@ func cloneAndCopy(ctx context.Context, r *git.RepoSpec, dest string) error {
129163
// for versioning multiple kpt packages in a single repo independently. It
130164
// relies on the private clonerUsingGitExec function to try fetching different
131165
// refs.
132-
func ClonerUsingGitExec(ctx context.Context, repoSpec *git.RepoSpec) error {
166+
func (c *Cloner) ClonerUsingGitExec(ctx context.Context) error {
133167
const op errors.Op = "fetch.ClonerUsingGitExec"
134168

135169
// Create a local representation of the upstream repo. This will initialize
136170
// the cache for the specified repo uri if it isn't already there. It also
137171
// fetches and caches all tag and branch refs from the upstream repo.
138-
upstreamRepo, err := gitutil.NewGitUpstreamRepo(ctx, repoSpec.CloneSpec())
139-
if err != nil {
140-
return errors.E(op, errors.Git, errors.Repo(repoSpec.CloneSpec()), err)
172+
upstreamRepo, exists := c.cachedRepo[c.repoSpec.CloneSpec()]
173+
if !exists {
174+
newUpstreamRemp, err := gitutil.NewGitUpstreamRepo(ctx, c.repoSpec.CloneSpec())
175+
if err != nil {
176+
return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err)
177+
}
178+
upstreamRepo = newUpstreamRemp
179+
c.cachedRepo[c.repoSpec.CloneSpec()] = upstreamRepo
141180
}
142181

143182
// Check if we have a ref in the upstream that matches the package-specific
144183
// reference. If we do, we use that reference.
145-
ps := strings.Split(repoSpec.Path, "/")
184+
ps := strings.Split(c.repoSpec.Path, "/")
146185
for len(ps) != 0 {
147186
p := path.Join(ps...)
148-
packageRef := path.Join(strings.TrimLeft(p, "/"), repoSpec.Ref)
187+
packageRef := path.Join(strings.TrimLeft(p, "/"), c.repoSpec.Ref)
149188
if _, found := upstreamRepo.ResolveTag(packageRef); found {
150-
repoSpec.Ref = packageRef
189+
c.repoSpec.Ref = packageRef
151190
break
152191
}
153192
ps = ps[:len(ps)-1]
154193
}
155194

156195
// Pull the required ref into the repo git cache.
157-
dir, err := upstreamRepo.GetRepo(ctx, []string{repoSpec.Ref})
196+
dir, err := upstreamRepo.GetRepo(ctx, []string{c.repoSpec.Ref})
158197
if err != nil {
159-
return errors.E(op, errors.Git, errors.Repo(repoSpec.CloneSpec()), err)
198+
return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err)
160199
}
161200

162201
gitRunner, err := gitutil.NewLocalGitRunner(dir)
163202
if err != nil {
164-
return errors.E(op, errors.Git, errors.Repo(repoSpec.CloneSpec()), err)
203+
return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err)
165204
}
166205

167206
// Find the commit SHA for the ref that was just fetched. We need the SHA
168207
// rather than the ref to be able to do a hard reset of the cache repo.
169-
commit, found := upstreamRepo.ResolveRef(repoSpec.Ref)
208+
commit, found := upstreamRepo.ResolveRef(c.repoSpec.Ref)
170209
if !found {
171-
commit = repoSpec.Ref
210+
commit = c.repoSpec.Ref
172211
}
173212

174213
// Reset the local repo to the commit we need. Doing a hard reset instead of
@@ -178,34 +217,34 @@ func ClonerUsingGitExec(ctx context.Context, repoSpec *git.RepoSpec) error {
178217
_, err = gitRunner.Run(ctx, "reset", "--hard", commit)
179218
if err != nil {
180219
gitutil.AmendGitExecError(err, func(e *gitutil.GitExecError) {
181-
e.Repo = repoSpec.CloneSpec()
220+
e.Repo = c.repoSpec.CloneSpec()
182221
e.Ref = commit
183222
})
184-
return errors.E(op, errors.Git, errors.Repo(repoSpec.CloneSpec()), err)
223+
return errors.E(op, errors.Git, errors.Repo(c.repoSpec.CloneSpec()), err)
185224
}
186225

187226
// We need to create a temp directory where we can copy the content of the repo.
188227
// During update, we need to checkout multiple versions of the same repo, so
189228
// we can't do merges directly from the cache.
190-
repoSpec.Dir, err = ioutil.TempDir("", "kpt-get-")
229+
c.repoSpec.Dir, err = ioutil.TempDir("", "kpt-get-")
191230
if err != nil {
192231
return errors.E(op, errors.Internal, fmt.Errorf("error creating temp directory: %w", err))
193232
}
194-
repoSpec.Commit = commit
233+
c.repoSpec.Commit = commit
195234

196-
pkgPath := filepath.Join(dir, repoSpec.Path)
235+
pkgPath := filepath.Join(dir, c.repoSpec.Path)
197236
// Verify that the requested path exists in the repo.
198237
_, err = os.Stat(pkgPath)
199238
if os.IsNotExist(err) {
200239
return errors.E(op,
201240
errors.Internal,
202241
err,
203-
fmt.Errorf("path %q does not exist in repo %q", repoSpec.Path, repoSpec.OrgRepo))
242+
fmt.Errorf("path %q does not exist in repo %q", c.repoSpec.Path, c.repoSpec.OrgRepo))
204243
}
205244

206245
// Copy the content of the pkg into the temp directory.
207246
// Note that we skip the content outside the package directory.
208-
err = copyDir(ctx, pkgPath, repoSpec.AbsPath())
247+
err = copyDir(ctx, pkgPath, c.repoSpec.AbsPath())
209248
if err != nil {
210249
return errors.E(op, errors.Internal, fmt.Errorf("error copying package: %w", err))
211250
}
@@ -226,7 +265,7 @@ func ClonerUsingGitExec(ctx context.Context, repoSpec *git.RepoSpec) error {
226265
var kfError *pkg.KptfileError
227266
if errors.As(err, &kfError) {
228267
return &pkg.RemoteKptfileError{
229-
RepoSpec: repoSpec,
268+
RepoSpec: c.repoSpec,
230269
Err: kfError.Err,
231270
}
232271
}

internal/util/update/update.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"strings"
2626

2727
"github.com/GoogleContainerTools/kpt/internal/errors"
28+
"github.com/GoogleContainerTools/kpt/internal/gitutil"
2829
"github.com/GoogleContainerTools/kpt/internal/pkg"
2930
"github.com/GoogleContainerTools/kpt/internal/printer"
3031
"github.com/GoogleContainerTools/kpt/internal/types"
@@ -102,10 +103,13 @@ type Command struct {
102103

103104
// Strategy is the update strategy to use
104105
Strategy kptfilev1.UpdateStrategyType
106+
107+
// cachedUpstreamRepos is an upstream repo already fetched for a given repoSpec CloneRef
108+
cachedUpstreamRepos map[string]*gitutil.GitUpstreamRepo
105109
}
106110

107111
// Run runs the Command.
108-
func (u Command) Run(ctx context.Context) error {
112+
func (u *Command) Run(ctx context.Context) error {
109113
const op errors.Op = "update.Run"
110114
pr := printer.FromContextOrDie(ctx)
111115

@@ -133,7 +137,9 @@ func (u Command) Run(ctx context.Context) error {
133137
if err != nil {
134138
return errors.E(op, u.Pkg.UniquePath, err)
135139
}
136-
140+
if u.cachedUpstreamRepos == nil {
141+
u.cachedUpstreamRepos = make(map[string]*gitutil.GitUpstreamRepo)
142+
}
137143
packageCount := 0
138144

139145
// Use stack to keep track of paths with a Kptfile that might contain
@@ -182,6 +188,11 @@ func (u Command) Run(ctx context.Context) error {
182188
return nil
183189
}
184190

191+
// GetCachedUpstreamRepos returns repos cached during update
192+
func (u Command) GetCachedUpstreamRepos() map[string]*gitutil.GitUpstreamRepo {
193+
return u.cachedUpstreamRepos
194+
}
195+
185196
// updateSubKf updates subpackage with given ref and update strategy
186197
func updateSubKf(subKf *kptfilev1.KptFile, ref string, strategy kptfilev1.UpdateStrategyType) {
187198
// check if explicit ref provided
@@ -250,7 +261,8 @@ func (u Command) updateRootPackage(ctx context.Context, p *pkg.Pkg) error {
250261
g := kf.Upstream.Git
251262
updated := &git.RepoSpec{OrgRepo: g.Repo, Path: g.Directory, Ref: g.Ref}
252263
pr.Printf("Fetching upstream from %s@%s\n", kf.Upstream.Git.Repo, kf.Upstream.Git.Ref)
253-
if err := fetch.ClonerUsingGitExec(ctx, updated); err != nil {
264+
cloner := fetch.NewCloner(updated, fetch.WithCachedRepo(u.cachedUpstreamRepos))
265+
if err := cloner.ClonerUsingGitExec(ctx); err != nil {
254266
return errors.E(op, p.UniquePath, err)
255267
}
256268
defer os.RemoveAll(updated.AbsPath())
@@ -260,7 +272,7 @@ func (u Command) updateRootPackage(ctx context.Context, p *pkg.Pkg) error {
260272
gLock := kf.UpstreamLock.Git
261273
originRepoSpec := &git.RepoSpec{OrgRepo: gLock.Repo, Path: gLock.Directory, Ref: gLock.Commit}
262274
pr.Printf("Fetching origin from %s@%s\n", kf.Upstream.Git.Repo, kf.Upstream.Git.Ref)
263-
if err := fetch.ClonerUsingGitExec(ctx, originRepoSpec); err != nil {
275+
if err := fetch.NewCloner(originRepoSpec, fetch.WithCachedRepo(u.cachedUpstreamRepos)).ClonerUsingGitExec(ctx); err != nil {
264276
return errors.E(op, p.UniquePath, err)
265277
}
266278
origin = originRepoSpec

0 commit comments

Comments
 (0)