Skip to content

Commit 5fa69b5

Browse files
authored
Optimize commit iterator (#9751)
1 parent c4c8b19 commit 5fa69b5

File tree

20 files changed

+236
-102
lines changed

20 files changed

+236
-102
lines changed

go/cmd/dolt/commands/archive.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ func historicalFuzzyMatching(ctx context.Context, heads hash.HashSet, groupings
250250
return err
251251
}
252252
for {
253-
h, _, err := iterator.Next(ctx)
253+
h, _, _, _, err := iterator.Next(ctx)
254254
if err != nil {
255255
if err == io.EOF {
256256
break

go/libraries/doltcore/doltdb/commit_itr.go

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ import (
2828

2929
// CommitItr is an interface for iterating over a set of unique commits
3030
type CommitItr[C Context] interface {
31-
// Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle
32-
// making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF
33-
Next(ctx C) (hash.Hash, *OptionalCommit, error)
31+
// Next returns the next commit's hash, pointer, metadata, and height.
32+
// Implementations of Next must handle making sure the list of commits returned are unique.
33+
// When complete Next will return hash.Hash{}, nil, nil, 0, io.EOF
34+
// Note: Some implementations may always return nil for the metadata and height return values.
35+
Next(ctx C) (hash.Hash, *OptionalCommit, *datas.CommitMeta, uint64, error)
3436

3537
// Reset the commit iterator back to the start
3638
Reset(ctx context.Context) error
@@ -88,25 +90,27 @@ func (cmItr *commitItr[C]) Reset(ctx context.Context) error {
8890
return nil
8991
}
9092

91-
// Next returns the hash of the next commit, and a pointer to that commit. It handles making sure the list of commits
92-
// returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF
93-
func (cmItr *commitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, error) {
93+
// Next returns the hash of the next commit, and a pointer to that commit.
94+
// It handles making sure the list of commits returned are unique.
95+
// When complete Next will return hash.Hash{}, nil, nil, 0, io.EOF.
96+
// Note: This implementation always returns nil for the metadata and height return values.
97+
func (cmItr *commitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, *datas.CommitMeta, uint64, error) {
9498
for cmItr.curr == nil {
9599
if cmItr.currentRoot >= len(cmItr.rootCommits) {
96-
return hash.Hash{}, nil, io.EOF
100+
return hash.Hash{}, nil, nil, 0, io.EOF
97101
}
98102

99103
cm := cmItr.rootCommits[cmItr.currentRoot]
100104
h, err := cm.HashOf()
101105

102106
if err != nil {
103-
return hash.Hash{}, nil, err
107+
return hash.Hash{}, nil, nil, 0, err
104108
}
105109

106110
if !cmItr.added[h] {
107111
cmItr.added[h] = true
108112
cmItr.curr = cm
109-
return h, &OptionalCommit{cmItr.curr, h}, nil
113+
return h, &OptionalCommit{cmItr.curr, h}, nil, 0, nil
110114
}
111115

112116
cmItr.currentRoot++
@@ -115,7 +119,7 @@ func (cmItr *commitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, error) {
115119
parents, err := cmItr.curr.ParentHashes(ctx)
116120

117121
if err != nil {
118-
return hash.Hash{}, nil, err
122+
return hash.Hash{}, nil, nil, 0, err
119123
}
120124

121125
for _, h := range parents {
@@ -137,13 +141,13 @@ func (cmItr *commitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, error) {
137141
cmItr.unprocessed = cmItr.unprocessed[:numUnprocessed-1]
138142
cmItr.curr, err = HashToCommit(ctx, cmItr.ddb.ValueReadWriter(), cmItr.ddb.ns, next)
139143
if err != nil && err != ErrGhostCommitEncountered {
140-
return hash.Hash{}, nil, err
144+
return hash.Hash{}, nil, nil, 0, err
141145
}
142146
if err == ErrGhostCommitEncountered {
143147
cmItr.curr = nil
144148
}
145149

146-
return next, &OptionalCommit{cmItr.curr, next}, nil
150+
return next, &OptionalCommit{cmItr.curr, next}, nil, 0, nil
147151
}
148152

149153
func HashToCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, h hash.Hash) (*Commit, error) {
@@ -183,19 +187,18 @@ func NewFilteringCommitItr[C Context](itr CommitItr[C], filter CommitFilter[C])
183187

184188
// Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle
185189
// making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF
186-
func (itr FilteringCommitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, error) {
190+
func (itr FilteringCommitItr[C]) Next(ctx C) (hash.Hash, *OptionalCommit, *datas.CommitMeta, uint64, error) {
187191
// iteration will terminate on io.EOF or a commit that is !filteredOut
188192
for {
189-
h, cm, err := itr.itr.Next(ctx)
190-
193+
h, cm, meta, height, err := itr.itr.Next(ctx)
191194
if err != nil {
192-
return hash.Hash{}, nil, err
195+
return hash.Hash{}, nil, nil, 0, err
193196
}
194197

195198
if filterOut, err := itr.filter(ctx, h, cm); err != nil {
196-
return hash.Hash{}, nil, err
199+
return hash.Hash{}, nil, nil, 0, err
197200
} else if !filterOut {
198-
return h, cm, nil
201+
return h, cm, meta, height, nil
199202
}
200203
}
201204
}
@@ -217,12 +220,14 @@ type CommitSliceIter[C Context] struct {
217220

218221
var _ CommitItr[context.Context] = (*CommitSliceIter[context.Context])(nil)
219222

220-
func (i *CommitSliceIter[C]) Next(ctx C) (hash.Hash, *OptionalCommit, error) {
223+
// Next implements the CommitItr interface.
224+
// Note: This implementation always returns nil for the metadata and height return values.
225+
func (i *CommitSliceIter[C]) Next(ctx C) (hash.Hash, *OptionalCommit, *datas.CommitMeta, uint64, error) {
221226
if i.i >= len(i.h) {
222-
return hash.Hash{}, nil, io.EOF
227+
return hash.Hash{}, nil, nil, 0, io.EOF
223228
}
224229
i.i++
225-
return i.h[i.i-1], &OptionalCommit{i.cm[i.i-1], i.h[i.i-1]}, nil
230+
return i.h[i.i-1], &OptionalCommit{i.cm[i.i-1], i.h[i.i-1]}, nil, 0, nil
226231

227232
}
228233

@@ -232,7 +237,7 @@ func (i *CommitSliceIter[C]) Reset(ctx context.Context) error {
232237
}
233238

234239
func NewOneCommitIter[C Context](cm *Commit, h hash.Hash, meta *datas.CommitMeta) *OneCommitIter[C] {
235-
return &OneCommitIter[C]{cm: &OptionalCommit{cm, h}, h: h}
240+
return &OneCommitIter[C]{cm: &OptionalCommit{cm, h}, h: h, m: meta}
236241
}
237242

238243
type OneCommitIter[C Context] struct {
@@ -244,13 +249,14 @@ type OneCommitIter[C Context] struct {
244249

245250
var _ CommitItr[context.Context] = (*OneCommitIter[context.Context])(nil)
246251

247-
func (i *OneCommitIter[C]) Next(_ C) (hash.Hash, *OptionalCommit, error) {
252+
// Next implements the CommitItr interface.
253+
// Note: This implementation always returns nil for the metadata and height return values.
254+
func (i *OneCommitIter[C]) Next(_ C) (hash.Hash, *OptionalCommit, *datas.CommitMeta, uint64, error) {
248255
if i.done {
249-
return hash.Hash{}, nil, io.EOF
256+
return hash.Hash{}, nil, nil, 0, io.EOF
250257
}
251258
i.done = true
252-
return i.h, i.cm, nil
253-
259+
return i.h, i.cm, i.m, 0, nil
254260
}
255261

256262
func (i *OneCommitIter[C]) Reset(_ context.Context) error {

go/libraries/doltcore/doltdb/doltdb.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,23 @@ func (ddb *DoltDB) Resolve(ctx context.Context, cs *CommitSpec, cwb ref.DoltRef)
474474
return commit.GetAncestor(ctx, cs.aSpec)
475475
}
476476

477+
// ResolveHash takes a hash and returns an OptionalCommit directly.
478+
// This assumes no ancestor spec resolution and no current working branch (cwb) needed.
479+
func (ddb *DoltDB) ResolveHash(ctx context.Context, hash hash.Hash) (*OptionalCommit, error) {
480+
commitValue, err := datas.LoadCommitAddr(ctx, ddb.vrw, hash)
481+
if err != nil {
482+
return nil, err
483+
}
484+
if commitValue.IsGhost() {
485+
return &OptionalCommit{nil, hash}, nil
486+
}
487+
commit, err := NewCommit(ctx, ddb.vrw, ddb.ns, commitValue)
488+
if err != nil {
489+
return nil, err
490+
}
491+
return &OptionalCommit{commit, hash}, nil
492+
}
493+
477494
// BootstrapShallowResolve is a special case of Resolve that is used to resolve a commit prior to pulling it's history
478495
// in a shallow clone. In general, application code should call Resolve and get an OptionalCommit. This is a special case
479496
// where we need to get the head commit for the commit closure used to determine what commits should skipped.

go/libraries/doltcore/env/actions/commitwalk/commitwalk.go

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -69,23 +69,25 @@ func (q *q) Swap(i, j int) {
6969
// the commit with the newer timestamp is "less". Finally if both commits are ghost commits, we don't really have enough
7070
// information to compare on, so we just compare the hashes to ensure that the results are stable.
7171
func (q *q) Less(i, j int) bool {
72-
_, okI := q.pending[i].commit.ToCommit()
73-
_, okJ := q.pending[i].commit.ToCommit()
72+
cI := q.pending[i]
73+
cJ := q.pending[j]
74+
_, okI := cI.commit.ToCommit()
75+
_, okJ := cJ.commit.ToCommit()
7476

7577
if !okI && okJ {
7678
return true
7779
} else if okI && !okJ {
7880
return false
7981
} else if !okI && !okJ {
80-
return q.pending[i].hash.String() < q.pending[j].hash.String()
82+
return cI.hash.String() < cJ.hash.String()
8183
}
8284

83-
if q.pending[i].height > q.pending[j].height {
85+
if cI.height > cJ.height {
8486
return true
8587
}
8688

87-
if q.pending[i].height == q.pending[j].height {
88-
return q.pending[i].meta.UserTimestamp > q.pending[j].meta.UserTimestamp
89+
if cI.height == cJ.height {
90+
return cI.meta.UserTimestamp > cJ.meta.UserTimestamp
8991
}
9092
return false
9193
}
@@ -128,15 +130,11 @@ func (q *q) SetInvisible(ctx context.Context, ddb *doltdb.DoltDB, id hash.Hash)
128130
}
129131

130132
func load(ctx context.Context, ddb *doltdb.DoltDB, h hash.Hash) (*doltdb.OptionalCommit, error) {
131-
cs, err := doltdb.NewCommitSpec(h.String())
133+
oc, err := ddb.ResolveHash(ctx, h)
132134
if err != nil {
133135
return nil, err
134136
}
135-
c, err := ddb.Resolve(ctx, cs, nil)
136-
if err != nil {
137-
return nil, err
138-
}
139-
return c, nil
137+
return oc, nil
140138
}
141139

142140
func (q *q) Get(ctx context.Context, ddb *doltdb.DoltDB, id hash.Hash) (*c, error) {
@@ -158,14 +156,21 @@ func (q *q) Get(ctx context.Context, ddb *doltdb.DoltDB, id hash.Hash) (*c, erro
158156
if err != nil {
159157
return nil, err
160158
}
159+
161160
meta, err := commit.GetCommitMeta(ctx)
162161
if err != nil {
163162
return nil, err
164163
}
165164

166-
c := &c{ddb: ddb, commit: &doltdb.OptionalCommit{Commit: commit, Addr: id}, meta: meta, height: h, hash: id}
167-
q.loaded[id] = c
168-
return c, nil
165+
lc := &c{
166+
ddb: ddb,
167+
commit: &doltdb.OptionalCommit{Commit: commit, Addr: id},
168+
meta: meta,
169+
height: h,
170+
hash: id,
171+
}
172+
q.loaded[id] = lc
173+
return lc, nil
169174
}
170175

171176
func newQueue() *q {
@@ -190,7 +195,7 @@ func GetDotDotRevisions(ctx context.Context, includedDB *doltdb.DoltDB, included
190195

191196
var commitList []*doltdb.OptionalCommit
192197
for num < 0 || len(commitList) < num {
193-
_, commit, err := itr.Next(ctx)
198+
_, commit, _, _, err := itr.Next(ctx)
194199
if err == io.EOF {
195200
break
196201
} else if err != nil {
@@ -234,43 +239,35 @@ func newCommiterator[C doltdb.Context](ctx context.Context, ddb *doltdb.DoltDB,
234239
}
235240

236241
// Next implements doltdb.CommitItr
237-
func (iter *commiterator[C]) Next(ctx C) (hash.Hash, *doltdb.OptionalCommit, error) {
242+
func (iter *commiterator[C]) Next(ctx C) (hash.Hash, *doltdb.OptionalCommit, *datas.CommitMeta, uint64, error) {
238243
if iter.q.NumVisiblePending() > 0 {
239244
nextC := iter.q.PopPending()
240-
241-
var err error
242-
parents := []hash.Hash{}
243245
commit, ok := nextC.commit.ToCommit()
244246
if ok {
245-
parents, err = commit.ParentHashes(ctx)
246-
if err != nil {
247-
return hash.Hash{}, nil, err
248-
}
249-
}
250-
251-
for _, parentID := range parents {
252-
if err := iter.q.AddPendingIfUnseen(ctx, nextC.ddb, parentID); err != nil {
253-
return hash.Hash{}, nil, err
247+
for _, parentCommit := range commit.DatasParents() {
248+
if err := iter.q.AddPendingIfUnseen(ctx, nextC.ddb, parentCommit.Addr()); err != nil {
249+
return hash.Hash{}, nil, nil, 0, err
250+
}
254251
}
255252
}
256253

254+
var err error
257255
matches := true
258256
if iter.matchFn != nil {
259257
matches, err = iter.matchFn(nextC.commit)
260-
261258
if err != nil {
262-
return hash.Hash{}, nil, err
259+
return hash.Hash{}, nil, nil, 0, err
263260
}
264261
}
265262

266263
if matches {
267-
return nextC.hash, &doltdb.OptionalCommit{Commit: commit, Addr: nextC.hash}, nil
264+
return nextC.hash, &doltdb.OptionalCommit{Commit: commit, Addr: nextC.hash}, nextC.meta, nextC.height, nil
268265
}
269266

270267
return iter.Next(ctx)
271268
}
272269

273-
return hash.Hash{}, nil, io.EOF
270+
return hash.Hash{}, nil, nil, 0, io.EOF
274271
}
275272

276273
// Reset implements doltdb.CommitItr
@@ -301,7 +298,7 @@ func GetTopNTopoOrderedCommitsMatching(ctx context.Context, ddb *doltdb.DoltDB,
301298

302299
var commitList []*doltdb.Commit
303300
for n < 0 || len(commitList) < n {
304-
_, optCmt, err := itr.Next(ctx)
301+
_, optCmt, _, _, err := itr.Next(ctx)
305302
if err == io.EOF {
306303
break
307304
} else if err != nil {
@@ -345,47 +342,47 @@ func newDotDotCommiterator[C doltdb.Context](ctx context.Context, includedDdb *d
345342
}
346343

347344
// Next implements doltdb.CommitItr
348-
func (i *dotDotCommiterator[C]) Next(ctx C) (hash.Hash, *doltdb.OptionalCommit, error) {
345+
func (i *dotDotCommiterator[C]) Next(ctx C) (hash.Hash, *doltdb.OptionalCommit, *datas.CommitMeta, uint64, error) {
349346
if i.q.NumVisiblePending() > 0 {
350347
nextC := i.q.PopPending()
351348

352349
commit, ok := nextC.commit.ToCommit()
353350
if !ok {
354-
return nextC.hash, nextC.commit, nil
351+
return nextC.hash, nextC.commit, nextC.meta, nextC.height, nil
355352
}
356353

357354
parents, err := commit.ParentHashes(ctx)
358355
if err != nil {
359-
return hash.Hash{}, nil, err
356+
return hash.Hash{}, nil, nil, 0, err
360357
}
361358

362359
for _, parentID := range parents {
363360
if nextC.invisible {
364361
if err := i.q.SetInvisible(ctx, nextC.ddb, parentID); err != nil {
365-
return hash.Hash{}, nil, err
362+
return hash.Hash{}, nil, nil, 0, err
366363
}
367364
}
368365
if err := i.q.AddPendingIfUnseen(ctx, nextC.ddb, parentID); err != nil {
369-
return hash.Hash{}, nil, err
366+
return hash.Hash{}, nil, nil, 0, err
370367
}
371368
}
372369

373370
matches := true
374371
if i.matchFn != nil {
375372
matches, err = i.matchFn(nextC.commit)
376373
if err != nil {
377-
return hash.Hash{}, nil, err
374+
return hash.Hash{}, nil, nil, 0, err
378375
}
379376
}
380377

381-
// If not invisible, return commit. Otherwise get next commit
378+
// If not invisible, return commit. Otherwise, get next commit
382379
if !nextC.invisible && matches {
383-
return nextC.hash, nextC.commit, nil
380+
return nextC.hash, nextC.commit, nextC.meta, nextC.height, nil
384381
}
385382
return i.Next(ctx)
386383
}
387384

388-
return hash.Hash{}, nil, io.EOF
385+
return hash.Hash{}, nil, nil, 0, io.EOF
389386
}
390387

391388
// Reset implements doltdb.CommitItr

go/libraries/doltcore/rebase/rebase.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ func findRebaseCommits(ctx *sql.Context, currentBranchCommit, upstreamBranchComm
200200
// Drain the iterator into a slice so that we can easily reverse the order of the commits
201201
// so that the oldest commit is first in the generated rebase plan.
202202
for {
203-
_, optCmt, err := commitItr.Next(ctx)
203+
_, optCmt, _, _, err := commitItr.Next(ctx)
204204
if err == io.EOF {
205205
return commits, nil
206206
} else if err != nil {

0 commit comments

Comments
 (0)