Skip to content

Commit a191ce8

Browse files
committed
*: add flag to the cli to skip forked repositories on discovery
Signed-off-by: Manuel Carmona <[email protected]>
1 parent 9ecab19 commit a191ce8

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

cmd/gitcollector/subcmd/download.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type DownloadCmd struct {
3030
Workers int `long:"workers" description:"number of workers, default to GOMAXPROCS" env:"GITCOLLECTOR_WORKERS"`
3131
HalfCPU bool `long:"half-cpu" description:"set the number of workers to half of the set workers" env:"GITCOLLECTOR_HALF_CPU"`
3232
NotAllowUpdates bool `long:"no-updates" description:"don't allow updates on already downloaded repositories" env:"GITCOLLECTOR_NO_UPDATES"`
33+
NoForks bool `long:"no-forks" description:"github forked repositories will not be downloaded" env:"GITCOLLECTOR_NO_FORKS"`
3334
Orgs string `long:"orgs" env:"GITHUB_ORGANIZATIONS" description:"list of github organization names separated by comma" required:"true"`
3435
Token string `long:"token" env:"GITHUB_TOKEN" description:"github token"`
3536
MetricsDBURI string `long:"metrics-db" env:"GITCOLLECTOR_METRICS_DB_URI" description:"uri to a database where metrics will be sent"`
@@ -139,7 +140,7 @@ func (c *DownloadCmd) Execute(args []string) error {
139140
wp.Run()
140141
log.Debugf("worker pool is running")
141142

142-
go runGHOrgProviders(log.New(nil), orgs, c.Token, download)
143+
go runGHOrgProviders(log.New(nil), orgs, c.Token, download, c.NoForks)
143144

144145
wp.Wait()
145146
log.Debugf("worker pool stopped successfully")
@@ -183,6 +184,7 @@ func runGHOrgProviders(
183184
orgs []string,
184185
token string,
185186
download chan gitcollector.Job,
187+
skipForks bool,
186188
) {
187189
var wg sync.WaitGroup
188190
wg.Add(len(orgs))
@@ -196,7 +198,9 @@ func runGHOrgProviders(
196198
AuthToken: token,
197199
},
198200
),
199-
&discovery.GHProviderOpts{},
201+
&discovery.GHProviderOpts{
202+
SkipForks: skipForks,
203+
},
200204
)
201205

202206
go func() {

discovery/provider.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ var (
2828

2929
// GHProviderOpts represents configuration options for a GHProvider.
3030
type GHProviderOpts struct {
31+
SkipForks bool
3132
WaitNewRepos bool
3233
WaitOnRateLimit bool
3334
StopTimeout time.Duration
@@ -158,6 +159,10 @@ func (p *GHProvider) enqueueJob(ctx context.Context) error {
158159
return nil
159160
}
160161

162+
if p.opts.SkipForks && repo.GetFork() {
163+
return nil
164+
}
165+
161166
endpoint, err := getEndpoint(repo)
162167
if err != nil {
163168
return nil

discovery/provider_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,38 @@ func TestGHProvider(t *testing.T) {
7272
req.True(strings.Contains(job.Endpoints[0], org))
7373
}
7474
}
75+
76+
func TestGHProviderSkipForks(t *testing.T) {
77+
var req = require.New(t)
78+
const org = "src-d"
79+
80+
queue := make(chan gitcollector.Job, 200)
81+
provider := NewGHProvider(
82+
queue,
83+
NewGHOrgReposIter(org, &GHReposIterOpts{}),
84+
&GHProviderOpts{
85+
SkipForks: true,
86+
MaxJobBuffer: 50,
87+
},
88+
)
89+
90+
done := make(chan struct{})
91+
go func() {
92+
err := provider.Start()
93+
req.True(ErrNewRepositoriesNotFound.Is(err))
94+
close(done)
95+
}()
96+
97+
<-done
98+
close(queue)
99+
forkedRepos := []string{"or-tools", "PyHive", "go-oniguruma"}
100+
for job := range queue {
101+
j, ok := job.(*library.Job)
102+
req.True(ok)
103+
req.Len(j.Endpoints, 1)
104+
105+
for _, forked := range forkedRepos {
106+
req.False(strings.Contains(j.Endpoints[0], forked))
107+
}
108+
}
109+
}

0 commit comments

Comments
 (0)