@@ -204,11 +204,34 @@ func clone(ctx context.Context, srcTS, sinkTS chunks.TableFileStore, sinkCS chun
204204 if failureCount >= maxAttempts {
205205 return err
206206 }
207- if _ , sourceFiles , appendixFiles , err = srcTS .Sources (ctx ); err != nil {
207+ if _ , refreshedSourceFiles , refreshedAppendixFiles , err : = srcTS .Sources (ctx ); err != nil {
208208 return err
209209 } else {
210- tblFiles = filterAppendicesFromSourceFiles (appendixFiles , sourceFiles )
211- _ , fileIDToTF , _ = mapTableFiles (tblFiles )
210+ refreshedTblFiles := filterAppendicesFromSourceFiles (refreshedAppendixFiles , refreshedSourceFiles )
211+ _ , refreshedFileIDToTF , _ := mapTableFiles (refreshedTblFiles )
212+ // Sources() will refresh remote table file
213+ // sources with new download URLs. However, it
214+ // will only return URLs for table files which
215+ // are in the remote manifest, which could
216+ // have changed since the clone started. Here
217+ // we keep around any old TableFile instances
218+ // for any TableFiles which have been
219+ // conjoined away or have been the victim of a
220+ // garbage collection run on the remote.
221+ //
222+ // If these files are no longer accessible,
223+ // for example because the URLs expired
224+ // without a RefreshTableFileUrlRequest being
225+ // provided, or because the table files
226+ // themselves have been removed from storage,
227+ // then continuing to use these sources will
228+ // fail terminally eventually. But in the
229+ // case of doltremoteapi on DoltHub, using
230+ // these Sources() will continue to work and
231+ // will allow the Clone to proceed.
232+ for k , v := range refreshedFileIDToTF {
233+ fileIDToTF [k ] = v
234+ }
212235 }
213236 }
214237
0 commit comments