Skip to content

Commit 9850933

Browse files
authored
Merge pull request #9306 from dolthub/aaron/clone-robustness-under-remote-conjoin
go: store/datas/pull: clone.go: Improve robust of Clone for certain remoteapi implementations when the remote Conjoins.
2 parents 8063f60 + c5a5ec8 commit 9850933

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

go/libraries/doltcore/remotestorage/chunk_store.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"go.opentelemetry.io/otel/attribute"
3737
"go.opentelemetry.io/otel/trace"
3838
"golang.org/x/sync/errgroup"
39+
"google.golang.org/protobuf/types/known/timestamppb"
3940

4041
remotesapi "github.com/dolthub/dolt/go/gen/proto/dolt/services/remotesapi/v1alpha1"
4142
"github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/reliable"
@@ -1228,6 +1229,17 @@ func (drtf DoltRemoteTableFile) Open(ctx context.Context) (io.ReadCloser, uint64
12281229
}
12291230

12301231
if resp.StatusCode/100 != 2 {
1232+
// XXX: If we fail to fetch, and we have the ability
1233+
// to refresh the URL, set ourselves up to refresh the
1234+
// next time we are used. This can help if the
1235+
// remoteapi endpoint gave us a URL that actually
1236+
// expires for some reason before the RefreshAfter
1237+
// timestamp. (Local clock drift, remote key rotation,
1238+
// etc.)
1239+
if drtf.info.RefreshAfter != nil {
1240+
drtf.info.RefreshAfter = timestamppb.Now()
1241+
drtf.info.RefreshAfter.Seconds -= 10
1242+
}
12311243
defer resp.Body.Close()
12321244
body := make([]byte, 4096)
12331245
n, _ := io.ReadFull(resp.Body, body)

go/store/datas/pull/clone.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,34 @@ func clone(ctx context.Context, srcTS, sinkTS chunks.TableFileStore, sinkCS chun
204204
if failureCount >= maxAttempts {
205205
return err
206206
}
207-
if _, sourceFiles, appendixFiles, err = srcTS.Sources(ctx); err != nil {
207+
if _, refreshedSourceFiles, refreshedAppendixFiles, err := srcTS.Sources(ctx); err != nil {
208208
return err
209209
} else {
210-
tblFiles = filterAppendicesFromSourceFiles(appendixFiles, sourceFiles)
211-
_, fileIDToTF, _ = mapTableFiles(tblFiles)
210+
refreshedTblFiles := filterAppendicesFromSourceFiles(refreshedAppendixFiles, refreshedSourceFiles)
211+
_, refreshedFileIDToTF, _ := mapTableFiles(refreshedTblFiles)
212+
// Sources() will refresh remote table file
213+
// sources with new download URLs. However, it
214+
// will only return URLs for table files which
215+
// are in the remote manifest, which could
216+
// have changed since the clone started. Here
217+
// we keep around any old TableFile instances
218+
// for any TableFiles which have been
219+
// conjoined away or have been the victim of a
220+
// garbage collection run on the remote.
221+
//
222+
// If these files are no longer accessible,
223+
// for example because the URLs expired
224+
// without a RefreshTableFileUrlRequest being
225+
// provided, or because the table files
226+
// themselves have been removed from storage,
227+
// then continuing to use these sources will
228+
// fail terminally eventually. But in the
229+
// case of doltremoteapi on DoltHub, using
230+
// these Sources() will continue to work and
231+
// will allow the Clone to proceed.
232+
for k, v := range refreshedFileIDToTF {
233+
fileIDToTF[k] = v
234+
}
212235
}
213236
}
214237

0 commit comments

Comments
 (0)