Skip to content

Commit b5343d7

Browse files
authored
Merge pull request #23 from filedrive-team/0.4.1
0.4.1
2 parents d78531e + 38eb8c8 commit b5343d7

File tree

5 files changed

+125
-26
lines changed

5 files changed

+125
-26
lines changed

README.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
Go-graphsplit
22
==================
3+
[![](https://img.shields.io/github/go-mod/go-version/filedrive-team/go-graphsplit)]()
4+
[![](https://goreportcard.com/badge/github.com/filedrive-team/go-graphsplit)](https://goreportcard.com/report/github.com/filedrive-team/go-graphsplit)
5+
[![](https://img.shields.io/github/license/filedrive-team/go-graphsplit)](https://github.com/filedrive-team/go-graphsplit/blob/main/LICENSE)
6+
37
> A tool for splitting large dataset into graph slices fit for making deal in the Filecoin Network.
48
59

@@ -15,10 +19,10 @@ git clone https://github.com/filedrive-team/go-graphsplit.git
1519

1620
cd go-graphsplit
1721

18-
// get submodules
22+
# get submodules
1923
git submodule update --init --recursive
2024

21-
// build filecoin-ffi
25+
# build filecoin-ffi
2226
make ffi
2327

2428
make
@@ -45,17 +49,17 @@ Splitting dataset:
4549
--parent-path=/path/to/dataset \
4650
/path/to/dataset
4751
```
48-
Notes: A manifest.csv will created to save the mapping with graph slice name and the payload cid. As following:
52+
Notes: A manifest.csv will created to save the mapping with graph slice name, the payload cid and slice inner structure. As following:
4953
```sh
5054
cat /path/to/car-dir/manifest.csv
51-
payload_cid,filename
52-
Qm...,graph-slice-name.car
55+
payload_cid,filename,detail
56+
Qm...,graph-slice-name.car,inner-structure-json
5357
```
5458
If set --calc-commp=true, two another fields would be add to manifest.csv
5559
```sh
5660
cat /path/to/car-dir/manifest.csv
57-
payload_cid,filename,piece_cid,piece_size
58-
Qm...,graph-slice-name.car,baga...,16646144
61+
payload_cid,filename,piece_cid,piece_size,detail
62+
Qm...,graph-slice-name.car,baga...,16646144,inner-structure-json
5963
```
6064

6165
Import car file to IPFS:

chunk.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@ import (
1515
var log = logging.Logger("graphsplit")
1616

1717
type GraphBuildCallback interface {
18-
OnSuccess(node ipld.Node, graphName string)
18+
OnSuccess(node ipld.Node, graphName, fsDetail string)
1919
OnError(error)
2020
}
2121

2222
type commPCallback struct {
2323
carDir string
2424
}
2525

26-
func (cc *commPCallback) OnSuccess(node ipld.Node, graphName string) {
26+
func (cc *commPCallback) OnSuccess(node ipld.Node, graphName, fsDetail string) {
2727
commpStartTime := time.Now()
2828
carfilepath := path.Join(cc.carDir, node.Cid().String()+".car")
2929
cpRes, err := CalcCommP(context.TODO(), carfilepath)
@@ -47,11 +47,11 @@ func (cc *commPCallback) OnSuccess(node ipld.Node, graphName string) {
4747
}
4848
defer f.Close()
4949
if isCreateAction {
50-
if _, err := f.Write([]byte("playload_cid,filename,piece_cid,piece_size\n")); err != nil {
50+
if _, err := f.Write([]byte("playload_cid,filename,piece_cid,piece_size,detail\n")); err != nil {
5151
log.Fatal(err)
5252
}
5353
}
54-
if _, err := f.Write([]byte(fmt.Sprintf("%s,%s,%s,%d\n", node.Cid(), graphName, cpRes.Root.String(), cpRes.Size))); err != nil {
54+
if _, err := f.Write([]byte(fmt.Sprintf("%s,%s,%s,%d,%s\n", node.Cid(), graphName, cpRes.Root.String(), cpRes.Size, fsDetail))); err != nil {
5555
log.Fatal(err)
5656
}
5757
}
@@ -64,7 +64,7 @@ type csvCallback struct {
6464
carDir string
6565
}
6666

67-
func (cc *csvCallback) OnSuccess(node ipld.Node, graphName string) {
67+
func (cc *csvCallback) OnSuccess(node ipld.Node, graphName, fsDetail string) {
6868
// Add node inof to manifest.csv
6969
manifestPath := path.Join(cc.carDir, "manifest.csv")
7070
_, err := os.Stat(manifestPath)
@@ -81,11 +81,11 @@ func (cc *csvCallback) OnSuccess(node ipld.Node, graphName string) {
8181
}
8282
defer f.Close()
8383
if isCreateAction {
84-
if _, err := f.Write([]byte("playload_cid,filename\n")); err != nil {
84+
if _, err := f.Write([]byte("playload_cid,filename,detail\n")); err != nil {
8585
log.Fatal(err)
8686
}
8787
}
88-
if _, err := f.Write([]byte(fmt.Sprintf("%s,%s\n", node.Cid(), graphName))); err != nil {
88+
if _, err := f.Write([]byte(fmt.Sprintf("%s,%s,%s\n", node.Cid(), graphName, fsDetail))); err != nil {
8989
log.Fatal(err)
9090
}
9191
}
@@ -96,7 +96,7 @@ func (cc *csvCallback) OnError(err error) {
9696

9797
type errCallback struct{}
9898

99-
func (cc *errCallback) OnSuccess(ipld.Node, string) {}
99+
func (cc *errCallback) OnSuccess(ipld.Node, string, string) {}
100100
func (cc *errCallback) OnError(err error) {
101101
log.Fatal(err)
102102
}

cmd/graphsplit/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ var chunkCmd = &cli.Command{
7979
sliceSize := c.Uint64("slice-size")
8080
parentPath := c.String("parent-path")
8181
carDir := c.String("car-dir")
82+
if !graphsplit.ExistDir(carDir) {
83+
return xerrors.Errorf("Unexpected! The path of car-dir does not exist")
84+
}
8285
graphName := c.String("graph-name")
8386
if sliceSize == 0 {
8487
return xerrors.Errorf("Unexpected! Slice size has been set as 0")

restore.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func NodeWriteTo(nd files.Node, fpath string) error {
7070
case files.Directory:
7171
if !ExistDir(fpath) {
7272
err := os.Mkdir(fpath, 0777)
73-
if err != nil {
73+
if err != nil && os.IsNotExist(err) {
7474
return err
7575
}
7676
}

utils.go

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package graphsplit
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"io"
78
"io/ioutil"
@@ -19,6 +20,7 @@ import (
1920
bstore "github.com/ipfs/go-ipfs-blockstore"
2021
chunker "github.com/ipfs/go-ipfs-chunker"
2122
offline "github.com/ipfs/go-ipfs-exchange-offline"
23+
format "github.com/ipfs/go-ipld-format"
2224
"github.com/ipfs/go-merkledag"
2325
dag "github.com/ipfs/go-merkledag"
2426
"github.com/ipfs/go-unixfs"
@@ -45,23 +47,101 @@ type Finfo struct {
4547
SeekEnd int64
4648
}
4749

50+
// file system tree node
51+
type fsNode struct {
52+
Name string
53+
Hash string
54+
Size uint64
55+
Link []fsNode
56+
}
57+
58+
type FSBuilder struct {
59+
root *dag.ProtoNode
60+
ds ipld.DAGService
61+
}
62+
63+
func NewFSBuilder(root *dag.ProtoNode, ds ipld.DAGService) *FSBuilder {
64+
return &FSBuilder{root, ds}
65+
}
66+
67+
func (b *FSBuilder) Build() (*fsNode, error) {
68+
fsn, err := unixfs.FSNodeFromBytes(b.root.Data())
69+
if err != nil {
70+
return nil, xerrors.Errorf("input dag is not a unixfs node: %s", err)
71+
}
72+
73+
rootn := &fsNode{
74+
Hash: b.root.Cid().String(),
75+
Size: fsn.FileSize(),
76+
Link: []fsNode{},
77+
}
78+
if !fsn.IsDir() {
79+
return rootn, nil
80+
}
81+
for _, ln := range b.root.Links() {
82+
fn, err := b.getNodeByLink(ln)
83+
if err != nil {
84+
return nil, err
85+
}
86+
rootn.Link = append(rootn.Link, fn)
87+
}
88+
89+
return rootn, nil
90+
}
91+
92+
func (b *FSBuilder) getNodeByLink(ln *format.Link) (fn fsNode, err error) {
93+
ctx := context.Background()
94+
fn = fsNode{
95+
Name: ln.Name,
96+
Hash: ln.Cid.String(),
97+
Size: ln.Size,
98+
}
99+
nd, err := b.ds.Get(ctx, ln.Cid)
100+
if err != nil {
101+
log.Warn(err)
102+
return
103+
}
104+
105+
nnd, ok := nd.(*dag.ProtoNode)
106+
if !ok {
107+
err = xerrors.Errorf("failed to transformed to dag.ProtoNode")
108+
return
109+
}
110+
fsn, err := unixfs.FSNodeFromBytes(nnd.Data())
111+
if err != nil {
112+
log.Warnf("input dag is not a unixfs node: %s", err)
113+
return
114+
}
115+
if !fsn.IsDir() {
116+
return
117+
}
118+
for _, ln := range nnd.Links() {
119+
node, err := b.getNodeByLink(ln)
120+
if err != nil {
121+
return node, err
122+
}
123+
fn.Link = append(fn.Link, node)
124+
}
125+
return
126+
}
127+
48128
func BuildIpldGraph(ctx context.Context, fileList []Finfo, graphName, parentPath, carDir string, parallel int, cb GraphBuildCallback) {
49-
node, err := buildIpldGraph(ctx, fileList, parentPath, carDir, parallel)
129+
node, fsDetail, err := buildIpldGraph(ctx, fileList, parentPath, carDir, parallel)
50130
if err != nil {
51131
//log.Fatal(err)
52132
cb.OnError(err)
53133
return
54134
}
55-
cb.OnSuccess(node, graphName)
135+
cb.OnSuccess(node, graphName, fsDetail)
56136
}
57137

58-
func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir string, parallel int) (ipld.Node, error) {
138+
func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir string, parallel int) (ipld.Node, string, error) {
59139
bs2 := bstore.NewBlockstore(dss.MutexWrap(datastore.NewMapDatastore()))
60140
dagServ := merkledag.NewDAGService(blockservice.New(bs2, offline.Exchange(bs2)))
61141

62142
cidBuilder, err := merkledag.PrefixForCidVersion(0)
63143
if err != nil {
64-
return nil, err
144+
return nil, "", err
65145
}
66146
fileNodeMap := make(map[string]*dag.ProtoNode)
67147
dirNodeMap := make(map[string]*dag.ProtoNode)
@@ -115,8 +195,11 @@ func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir st
115195
// log.Info(item.Path)
116196
// log.Infof("file name: %s, file size: %d, item size: %d, seek-start:%d, seek-end:%d", item.Name, item.Info.Size(), item.SeekEnd-item.SeekStart, item.SeekStart, item.SeekEnd)
117197
dirStr := path.Dir(item.Path)
118-
119-
if parentPath != "" && strings.HasPrefix(dirStr, parentPath) {
198+
parentPath = path.Clean(parentPath)
199+
// when parent path equal target path, and the parent path is also a file path
200+
if parentPath == path.Clean(item.Path) {
201+
dirStr = ""
202+
} else if parentPath != "" && strings.HasPrefix(dirStr, parentPath) {
120203
dirStr = dirStr[len(parentPath):]
121204
}
122205

@@ -175,7 +258,7 @@ func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir st
175258
if isLinked(parentNode, dir) {
176259
parentNode, err = parentNode.UpdateNodeLink(dir, dirNode)
177260
if err != nil {
178-
return nil, err
261+
return nil, "", err
179262
}
180263
dirNodeMap[parentKey] = parentNode
181264
} else {
@@ -197,7 +280,7 @@ func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir st
197280
//car
198281
carF, err := os.Create(path.Join(carDir, rootNode.Cid().String()+".car"))
199282
if err != nil {
200-
return nil, err
283+
return nil, "", err
201284
}
202285
defer carF.Close()
203286
selector := allSelector()
@@ -206,13 +289,22 @@ func buildIpldGraph(ctx context.Context, fileList []Finfo, parentPath, carDir st
206289
// cario := cario.NewCarIO()
207290
// err = cario.WriteCar(context.Background(), bs2, rootNode.Cid(), selector, carF)
208291
if err != nil {
209-
return nil, err
292+
return nil, "", err
210293
}
211294
log.Infof("generate car file completed, time elapsed: %s", time.Now().Sub(genCarStartTime))
212295

296+
fsBuilder := NewFSBuilder(rootNode, dagServ)
297+
fsNode, err := fsBuilder.Build()
298+
if err != nil {
299+
return nil, "", err
300+
}
301+
fsNodeBytes, err := json.Marshal(fsNode)
302+
if err != nil {
303+
return nil, "", err
304+
}
213305
//log.Info(dirNodeMap)
214306
fmt.Println("++++++++++++ finished to build ipld +++++++++++++")
215-
return rootNode, nil
307+
return rootNode, fmt.Sprintf("%s", fsNodeBytes), nil
216308
}
217309

218310
func allSelector() ipldprime.Node {

0 commit comments

Comments
 (0)