Skip to content

Commit 3add881

Browse files
authored
Merge pull request #9720 from dolthub/macneale4/conjoin-prep
[no-release-notes] conjoin prep
2 parents 14be165 + 03d7fac commit 3add881

File tree

9 files changed

+128
-187
lines changed

9 files changed

+128
-187
lines changed

go/libraries/doltcore/remotesrv/grpc.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ func (rs *RemoteChunkStore) GetDownloadLocations(ctx context.Context, req *remot
192192

193193
var ranges []*remotesapi.RangeChunk
194194
for h, r := range hashToRange {
195+
if r.DictLength != 0 {
196+
return nil, status.Error(codes.Unknown, "upgrade your dolt client; it is too old to read these files")
197+
}
198+
195199
hCpy := h
196200
ranges = append(ranges, &remotesapi.RangeChunk{Hash: hCpy[:], Offset: r.Offset, Length: r.Length})
197201
}

go/store/nbs/archive.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ Footer:
6969
is that we load the larger footer for all versions, but ignore the first 4 bytes for versions 1 and 2.
7070
7171
CheckSums:
72-
+----------------------------+-------------------+----------------------+
73-
| (64) Sha512 ByteSpan 1 - N | (64) Sha512 Index | (64) Sha512 Metadata |
74-
+----------------------------+-------------------+----------------------+
75-
- The Sha512 checksums of the ByteSpans, Index, and Metadata. Currently unused, but may be used in the future. Leaves
76-
the opening to verify integrity manually at least, but could be used in the future to allow to break the file into
77-
parts, and ensure we can verify the integrity of each part.
72+
+------------------+
73+
| (192) DEAD SPACE |
74+
+------------------+
75+
- The Sha512 checksums of the ByteSpans, Index, and Metadata were in initial design, but were never used. The ability
76+
to calculate was thrown out to support archive conjoins, and leaving the 192 bytes in the foorer allows us to avoid
77+
a format bump.
7878
7979
Index:
8080
The Index is a concatenation of 4 sections, all of which are stored in raw form on disk.
@@ -131,6 +131,7 @@ Index:
131131
132132
- Each Hash Suffix is the last 12 bytes of a Chunk in this Table.
133133
- Hash Suffix M must correspond to Prefix M and Chunk Record M
134+
- The ID, or name, of the artifact is calculated using the truncated Sha512 (first 20 bytes) of the Suffix data.
134135
135136
Metadata:
136137
The Metadata section is intended to be used for additional information about the Archive. This may include the version

go/store/nbs/archive_build.go

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,21 @@ package nbs
1616

1717
import (
1818
"context"
19-
"encoding/json"
2019
"errors"
2120
"fmt"
2221
"math"
2322
"math/rand"
2423
"os"
2524
"path/filepath"
2625
"sort"
26+
"strings"
2727
"sync"
2828
"sync/atomic"
29-
"time"
3029

3130
"github.com/dolthub/gozstd"
3231
lru "github.com/hashicorp/golang-lru/v2"
3332
"golang.org/x/sync/errgroup"
3433

35-
"github.com/dolthub/dolt/go/cmd/dolt/doltversion"
3634
"github.com/dolthub/dolt/go/store/chunks"
3735
"github.com/dolthub/dolt/go/store/hash"
3836
)
@@ -336,42 +334,10 @@ func convertTableFileToArchive(
336334
return arcW.finalPath, name, chunkCount, err
337335
}
338336

339-
func indexFinalize(arcW *archiveWriter, originTableFile hash.Hash) error {
340-
err := arcW.finalizeByteSpans()
341-
if err != nil {
342-
return err
343-
}
344-
345-
err = arcW.writeIndex()
346-
if err != nil {
347-
return err
348-
}
349-
350-
meta := map[string]string{
351-
amdkDoltVersion: doltversion.Version,
352-
amdkConversionTime: time.Now().UTC().Format(time.RFC3339),
353-
}
354-
if !originTableFile.IsEmpty() {
355-
meta[amdkOriginTableFile] = originTableFile.String()
356-
}
357-
358-
jsonData, err := json.Marshal(meta)
359-
if err != nil {
360-
return err
361-
}
362-
363-
err = arcW.writeMetadata(jsonData)
364-
if err != nil {
365-
return err
366-
}
367-
368-
return arcW.writeFooter()
369-
}
370-
371337
// indexAndFinalizeArchive writes the index, metadata, and footer to the archive file. It also flushes the archive writer
372338
// to the directory provided. The name is calculated from the footer, and can be obtained by calling getName on the archive.
373339
func indexFinalizeFlushArchive(arcW *archiveWriter, archivePath string, originTableFile hash.Hash) error {
374-
err := indexFinalize(arcW, originTableFile)
340+
err := arcW.indexFinalize(originTableFile)
375341
if err != nil {
376342
return err
377343
}
@@ -600,7 +566,13 @@ func verifyAllChunks(ctx context.Context, idx tableIndex, archiveFile string, pr
600566
return err
601567
}
602568

603-
index, err := newArchiveReader(ctx, fra, uint64(fra.sz), stats)
569+
id := strings.TrimSuffix(filepath.Base(archiveFile), ArchiveFileSuffix)
570+
name, ok := hash.MaybeParse(id)
571+
if !ok {
572+
return fmt.Errorf("invalid archive file path: %s", archiveFile)
573+
}
574+
575+
index, err := newArchiveReader(ctx, fra, name, uint64(fra.sz), stats)
604576
if err != nil {
605577
return err
606578
}

go/store/nbs/archive_chunk_source.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ package nbs
1717
import (
1818
"context"
1919
"encoding/binary"
20+
"fmt"
2021
"io"
2122
"path/filepath"
23+
"strings"
2224

2325
"github.com/pkg/errors"
2426
"golang.org/x/sync/errgroup"
@@ -42,7 +44,7 @@ func newArchiveChunkSource(ctx context.Context, dir string, h hash.Hash, chunkCo
4244
return archiveChunkSource{}, err
4345
}
4446

45-
aRdr, err := newArchiveReader(ctx, fra, uint64(fra.sz), stats)
47+
aRdr, err := newArchiveReader(ctx, fra, h, uint64(fra.sz), stats)
4648
if err != nil {
4749
return archiveChunkSource{}, err
4850
}
@@ -64,7 +66,13 @@ func newAWSArchiveChunkSource(ctx context.Context,
6466
return emptyChunkSource{}, err
6567
}
6668

67-
aRdr, err := newArchiveReaderFromFooter(ctx, &s3TableReaderAt{s3, name}, sz, footer, stats)
69+
id := strings.TrimSuffix(filepath.Base(name), ArchiveFileSuffix)
70+
hashId, ok := hash.MaybeParse(id)
71+
if !ok {
72+
return emptyChunkSource{}, fmt.Errorf("invalid archive file path: %s", name)
73+
}
74+
75+
aRdr, err := newArchiveReaderFromFooter(ctx, &s3TableReaderAt{s3, name}, hashId, sz, footer, stats)
6876
if err != nil {
6977
return emptyChunkSource{}, err
7078
}

go/store/nbs/archive_reader.go

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ func (f archiveFooter) metadataSpan() byteSpan {
113113
return byteSpan{offset: f.fileSize - f.actualFooterSize() - uint64(f.metadataSize), length: uint64(f.metadataSize)}
114114
}
115115

116-
func newArchiveMetadata(ctx context.Context, reader tableReaderAt, fileSize uint64, stats *Stats) (*ArchiveMetadata, error) {
117-
aRdr, err := newArchiveReader(ctx, reader, fileSize, stats)
116+
func newArchiveMetadata(ctx context.Context, reader tableReaderAt, name hash.Hash, fileSize uint64, stats *Stats) (*ArchiveMetadata, error) {
117+
aRdr, err := newArchiveReader(ctx, reader, name, fileSize, stats)
118118
if err != nil {
119119
return nil, err
120120
}
@@ -180,21 +180,21 @@ func newArchiveMetadata(ctx context.Context, reader tableReaderAt, fileSize uint
180180
}, nil
181181
}
182182

183-
func newArchiveReaderFromFooter(ctx context.Context, reader tableReaderAt, fileSz uint64, footer []byte, stats *Stats) (archiveReader, error) {
183+
func newArchiveReaderFromFooter(ctx context.Context, reader tableReaderAt, name hash.Hash, fileSz uint64, footer []byte, stats *Stats) (archiveReader, error) {
184184
if uint64(len(footer)) != archiveFooterSize {
185185
return archiveReader{}, errors.New("runtime error: invalid footer.")
186186
}
187187

188-
ftr, err := buildFooter(fileSz, footer)
188+
ftr, err := buildFooter(name, fileSz, footer)
189189
if err != nil {
190190
return archiveReader{}, err
191191
}
192192

193193
return buildArchiveReader(ctx, reader, ftr, stats)
194194
}
195195

196-
func newArchiveReader(ctx context.Context, reader tableReaderAt, fileSize uint64, stats *Stats) (archiveReader, error) {
197-
footer, err := loadFooter(ctx, reader, fileSize, stats)
196+
func newArchiveReader(ctx context.Context, reader tableReaderAt, name hash.Hash, fileSize uint64, stats *Stats) (archiveReader, error) {
197+
footer, err := loadFooter(ctx, reader, name, fileSize, stats)
198198
if err != nil {
199199
return archiveReader{}, fmt.Errorf("Failed to loadFooter: %w", err)
200200
}
@@ -355,17 +355,17 @@ func newSectionReader(ctx context.Context, rd ReaderAtWithStats, off, len int64,
355355
return io.NewSectionReader(readerAtWithStatsBridge{rd, ctx, stats}, off, len)
356356
}
357357

358-
func loadFooter(ctx context.Context, reader ReaderAtWithStats, fileSize uint64, stats *Stats) (f archiveFooter, err error) {
358+
func loadFooter(ctx context.Context, reader ReaderAtWithStats, name hash.Hash, fileSize uint64, stats *Stats) (f archiveFooter, err error) {
359359
section := newSectionReader(ctx, reader, int64(fileSize-archiveFooterSize), int64(archiveFooterSize), stats)
360360
buf := make([]byte, archiveFooterSize)
361361
_, err = io.ReadFull(section, buf)
362362
if err != nil {
363363
return
364364
}
365-
return buildFooter(fileSize, buf)
365+
return buildFooter(name, fileSize, buf)
366366
}
367367

368-
func buildFooter(fileSize uint64, buf []byte) (f archiveFooter, err error) {
368+
func buildFooter(name hash.Hash, fileSize uint64, buf []byte) (f archiveFooter, err error) {
369369
f.formatVersion = buf[afrVersionOffset]
370370
f.fileSignature = string(buf[afrSigOffset:])
371371
// Verify File Signature
@@ -404,14 +404,7 @@ func buildFooter(fileSize uint64, buf []byte) (f archiveFooter, err error) {
404404
f.metaCheckSum = sha512Sum(buf[afrMetaChkSumOffset : afrMetaChkSumOffset+sha512.Size])
405405
f.fileSize = fileSize
406406

407-
// calculate the hash of the footer. We don't currently verify that this is what was used to load the content.
408-
sha := sha512.New()
409-
if smallFooter {
410-
buf = buf[4:]
411-
}
412-
413-
sha.Write(buf)
414-
f.hash = hash.New(sha.Sum(nil)[:hash.ByteLen])
407+
f.hash = name
415408

416409
return
417410
}
@@ -583,22 +576,6 @@ func (ar archiveReader) getMetadata(ctx context.Context, stats *Stats) ([]byte,
583576
return ar.readByteSpan(ctx, ar.footer.metadataSpan(), stats)
584577
}
585578

586-
// verifyDataCheckSum verifies the checksum of the data section of the archive. Note - this requires a fully read of
587-
// the data section, which could be sizable.
588-
func (ar archiveReader) verifyDataCheckSum(ctx context.Context, stats *Stats) error {
589-
return verifyCheckSum(ctx, ar.reader, ar.footer.dataSpan(), ar.footer.dataCheckSum, stats)
590-
}
591-
592-
// verifyIndexCheckSum verifies the checksum of the index section of the archive.
593-
func (ar archiveReader) verifyIndexCheckSum(ctx context.Context, stats *Stats) error {
594-
return verifyCheckSum(ctx, ar.reader, ar.footer.totalIndexSpan(), ar.footer.indexCheckSum, stats)
595-
}
596-
597-
// verifyMetaCheckSum verifies the checksum of the metadata section of the archive.
598-
func (ar archiveReader) verifyMetaCheckSum(ctx context.Context, stats *Stats) error {
599-
return verifyCheckSum(ctx, ar.reader, ar.footer.metadataSpan(), ar.footer.metaCheckSum, stats)
600-
}
601-
602579
func (ar archiveReader) iterate(ctx context.Context, cb func(chunks.Chunk) error, stats *Stats) error {
603580
for i := uint32(0); i < ar.footer.chunkCount; i++ {
604581
var hasBytes [hash.ByteLen]byte

0 commit comments

Comments
 (0)