Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Help Options:
--no-updates don't allow updates on already downloaded repositories [$GITCOLLECTOR_NO_UPDATES]
--no-forks github forked repositories will not be downloaded [$GITCOLLECTOR_NO_FORKS]
--orgs= list of github organization names separated by comma [$GITHUB_ORGANIZATIONS]
--excluded-repos= list of repos to exclude separated by comma [$GITCOLLECTOR_EXCLUDED_REPOS]
--token= github token [$GITHUB_TOKEN]
--metrics-db= uri to a database where metrics will be sent [$GITCOLLECTOR_METRICS_DB_URI]
--metrics-db-table= table name where the metrics will be added (default: gitcollector_metrics) [$GITCOLLECTOR_METRICS_DB_TABLE]
Expand Down
11 changes: 10 additions & 1 deletion cmd/gitcollector/subcmd/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type DownloadCmd struct {
NotAllowUpdates bool `long:"no-updates" description:"don't allow updates on already downloaded repositories" env:"GITCOLLECTOR_NO_UPDATES"`
NoForks bool `long:"no-forks" description:"github forked repositories will not be downloaded" env:"GITCOLLECTOR_NO_FORKS"`
Orgs string `long:"orgs" env:"GITHUB_ORGANIZATIONS" description:"list of github organization names separated by comma" required:"true"`
ExcludedRepos string `long:"excluded-repos" env:"GITCOLLECTOR_EXCLUDED_REPOS" description:"list of repos to exclude separated by comma" required:"false"`
Token string `long:"token" env:"GITHUB_TOKEN" description:"github token"`
MetricsDBURI string `long:"metrics-db" env:"GITCOLLECTOR_METRICS_DB_URI" description:"uri to a database where metrics will be sent"`
MetricsDBTable string `long:"metrics-db-table" env:"GITCOLLECTOR_METRICS_DB_TABLE" default:"gitcollector_metrics" description:"table name where the metrics will be added"`
Expand All @@ -56,6 +57,12 @@ func (c *DownloadCmd) Execute(args []string) error {
orgs = append(orgs, strings.ToLower(org))
}

ers := strings.Split(c.ExcludedRepos, ",")
excludedRepos := make([]string, 0, len(ers))
for _, er := range ers {
excludedRepos = append(excludedRepos, er)
}

info, err := os.Stat(c.LibPath)
if err != nil {
log.Errorf(err, "wrong path to locate the library")
Expand Down Expand Up @@ -160,7 +167,7 @@ func (c *DownloadCmd) Execute(args []string) error {
wp.Run()
log.Debugf("worker pool is running")

go runGHOrgProviders(log.New(nil), orgs, c.Token, download, c.NoForks)
go runGHOrgProviders(log.New(nil), orgs, excludedRepos, c.Token, download, c.NoForks)

wp.Wait()
log.Debugf("worker pool stopped successfully")
Expand Down Expand Up @@ -198,6 +205,7 @@ func setupMetrics(
func runGHOrgProviders(
logger log.Logger,
orgs []string,
excludedRepos []string,
token string,
download chan gitcollector.Job,
skipForks bool,
Expand All @@ -208,6 +216,7 @@ func runGHOrgProviders(
org := o
p := provider.NewGitHubOrg(
org,
excludedRepos,
token,
download,
&discovery.GitHubOpts{
Expand Down
45 changes: 28 additions & 17 deletions discovery/github-iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@ const (

// GHOrgReposIter is a GHRepositoriesIter by organization name.
type GHOrgReposIter struct {
org string
client *github.Client
repos []*github.Repository
checkpoint int
opts *github.RepositoryListByOrgOptions
waitNewRepos time.Duration
org string
excludedRepos map[string]struct{}
client *github.Client
repos []*github.Repository
checkpoint int
opts *github.RepositoryListByOrgOptions
waitNewRepos time.Duration
}

var _ GHRepositoriesIter = (*GHOrgReposIter)(nil)

// NewGHOrgReposIter builds a new GHOrgReposIter.
func NewGHOrgReposIter(org string, opts *GHReposIterOpts) *GHOrgReposIter {
func NewGHOrgReposIter(org string, excludedRepos []string, opts *GHReposIterOpts) *GHOrgReposIter {
if opts == nil {
opts = &GHReposIterOpts{}
}
Expand All @@ -61,9 +62,15 @@ func NewGHOrgReposIter(org string, opts *GHReposIterOpts) *GHOrgReposIter {
wnr = waitNewRepos
}

excludedReposSet := make(map[string]struct{})
for _, excludedRepo := range excludedRepos {
excludedReposSet[excludedRepo] = struct{}{}
}

return &GHOrgReposIter{
org: org,
client: newGithubClient(opts.AuthToken, to),
org: org,
excludedRepos: excludedReposSet,
client: newGithubClient(opts.AuthToken, to),
opts: &github.RepositoryListByOrgOptions{
ListOptions: github.ListOptions{PerPage: rpp},
},
Expand Down Expand Up @@ -92,16 +99,20 @@ func newGithubClient(token string, timeout time.Duration) *github.Client {
func (p *GHOrgReposIter) Next(
ctx context.Context,
) (*github.Repository, time.Duration, error) {
if len(p.repos) == 0 {
retry, err := p.requestRepos(ctx)
if err != nil && len(p.repos) == 0 {
return nil, retry, err
for {
if len(p.repos) == 0 {
retry, err := p.requestRepos(ctx)
if err != nil && len(p.repos) == 0 {
return nil, retry, err
}
}
}

var next *github.Repository
next, p.repos = p.repos[0], p.repos[1:]
return next, 0, nil
var next *github.Repository
next, p.repos = p.repos[0], p.repos[1:]
if _, ok := p.excludedRepos[next.GetName()]; !ok {
return next, 0, nil
}
}
}

func (p *GHOrgReposIter) requestRepos(
Expand Down
81 changes: 77 additions & 4 deletions discovery/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func TestGitHub(t *testing.T) {

discovery := NewGitHub(
advertiseRepos,
NewGHOrgReposIter(org, &GHReposIterOpts{
NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{
TimeNewRepos: 1 * time.Second,
ResultsPerPage: 100,
AuthToken: token,
Expand Down Expand Up @@ -112,7 +112,7 @@ func TestGitHubSkipForks(t *testing.T) {

discovery := NewGitHub(
advertiseRepos,
NewGHOrgReposIter(org, &GHReposIterOpts{
NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{
AuthToken: token,
}),
&GitHubOpts{
Expand Down Expand Up @@ -143,6 +143,79 @@ func TestGitHubSkipForks(t *testing.T) {
}
}

func TestExcludeRepos(t *testing.T) {
var req = require.New(t)

const (
org = "src-d"
timeToStop = 5 * time.Second
)

token, _ := getToken()
queue := make(chan *github.Repository, 50)
advertiseRepos := func(
_ context.Context,
repos []*github.Repository,
) error {
for _, repo := range repos {
queue <- repo
}

return nil
}

discovery := NewGitHub(
advertiseRepos,
NewGHOrgReposIter(org, []string{"gitcollector"}, &GHReposIterOpts{
TimeNewRepos: 1 * time.Second,
ResultsPerPage: 100,
AuthToken: token,
}),
&GitHubOpts{
MaxJobBuffer: 50,
},
)

var (
consumedRepos = make(chan *github.Repository, 200)
stop bool
done = make(chan struct{})
)

go func() {
defer func() { done <- struct{}{} }()
for !stop {
select {
case repo, ok := <-queue:
if !ok {
return
}

select {
case consumedRepos <- repo:
case <-time.After(timeToStop):
stop = true
}
}
}
}()

err := discovery.Start()
req.True(ErrDiscoveryStopped.Is(err))

close(queue)
<-done
req.False(stop)
close(consumedRepos)

for repo := range consumedRepos {
ep, err := GetGHEndpoint(repo)
req.NoError(err)
req.True(strings.Contains(ep, org))
req.NotEqual("gitcollector", *repo.Name)
}
}

// TODO request rate error ?

// TestProxyMockUps
Expand Down Expand Up @@ -206,7 +279,7 @@ func testProxyMockUp(t *testing.T, code int, errContains string) {

discovery := NewGitHub(
advertiseRepos,
NewGHOrgReposIter(org, &GHReposIterOpts{
NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{
TimeNewRepos: 1 * time.Second,
ResultsPerPage: 100,
AuthToken: token,
Expand Down Expand Up @@ -272,7 +345,7 @@ func testAdvertise(t *testing.T, ac advertiseCase) {

discovery := NewGitHub(
advertiseRepos,
NewGHOrgReposIter(org, &GHReposIterOpts{
NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{
TimeNewRepos: 1 * time.Second,
ResultsPerPage: 100,
AuthToken: token,
Expand Down
3 changes: 2 additions & 1 deletion provider/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ import (
// based on a discovery.Github.
func NewGitHubOrg(
org string,
excludedRepos []string,
authToken string,
queue chan<- gitcollector.Job,
opts *discovery.GitHubOpts,
) *discovery.GitHub {
return discovery.NewGitHub(
AdvertiseGHRepositoriesOnJobQueue(queue),
discovery.NewGHOrgReposIter(org, &discovery.GHReposIterOpts{
discovery.NewGHOrgReposIter(org, excludedRepos, &discovery.GHReposIterOpts{
AuthToken: authToken,
}),
opts,
Expand Down
1 change: 1 addition & 0 deletions provider/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func TestGitHub(t *testing.T) {
queue := make(chan gitcollector.Job, 50)
provider := NewGitHubOrg(
org,
[]string{},
"",
queue,
&discovery.GitHubOpts{
Expand Down