Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/org_fetcher.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Org Contributions Fetcher

on:
workflow_dispatch:

jobs:
fetch_contributions:
runs-on: ubuntu-latest

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Cache pip
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-

- name: Install dependencies
run: pip install requests

- name: Check for GH_TOKEN (PAT)
run: |
if [ -z "${{ secrets.PAT }}" ]; then
echo "::error::GH_TOKEN (PAT) is not set"
exit 1
fi

- name: Run Org Fetcher
env:
PAT: ${{ secrets.PAT }}
run: python issue_contributor_fetcher/org_fetcher/org_fetcher.py

- name: Upload CSV artifact
uses: actions/upload-artifact@v4
with:
name: org-contributions
path: issue_contributor_fetcher/org_fetcher/org_contr.csv
4 changes: 2 additions & 2 deletions .github/workflows/repo-update.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Auto Label Multiple Repos with Tracking
name: Auto Label Multiple Repos

on:
workflow_dispatch:
Expand Down Expand Up @@ -63,7 +63,7 @@ jobs:
git status
git diff
git add $JSON_FILE
git commit -m "Update label status for target repos"
git commit -m "GitHub Action Commit: Completed labeling for specified target repos"
git log -1
git push origin mixin --verbose
else
Expand Down
49 changes: 49 additions & 0 deletions .github/workflows/repo_fetcher.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Repo Contributions Fetcher

on:
workflow_dispatch:

jobs:
fetch_issues_prs:
runs-on: ubuntu-latest

steps:
- name: Checkout Repo
uses: actions/checkout@v4
with:
ref: mixin

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Cache pip
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-

- name: Install Python Dependencies
run: pip install requests

- name: Check for GH_TOKEN (PAT)
run: |
if [ -z "${{ secrets.PAT }}" ]; then
echo "::error::GH_TOKEN (PAT) is not set"
exit 1
fi

- name: Run Repo Fetcher Script
env:
GH_TOKEN: ${{ secrets.PAT }}
run: |
python issue_contributor_fetcher/repo_fetcher/repo_fetcher.py

- name: Upload CSV Results
uses: actions/upload-artifact@v4
with:
name: poc-results
path: issue_contributor_fetcher/repo_fetcher/poc_results.csv
15 changes: 9 additions & 6 deletions API_repo_labels/scripts/repo_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import re
import time
import logging
import requests

logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
Expand Down Expand Up @@ -95,6 +94,7 @@ def create_labels(owner, repo, labels, token):
failed = False
i = 0
success_count = 0
secondary_delay = 30

while i < len(labels):
# retrives name of the label to be created (from list)
Expand All @@ -116,7 +116,8 @@ def create_labels(owner, repo, labels, token):
"name": label_name,
"description": label['description'][:100],
"color": label['color']
}
},
timeout=10
)

if create_resp.status_code == 201:
Expand All @@ -136,11 +137,13 @@ def create_labels(owner, repo, labels, token):
sleep_duration = max(reset_time - int(time.time()) + 5, 5)
logging.warning(f"Rate limit reached, sleeping for {sleep_duration}s...")
time.sleep(sleep_duration)
continue # retry same label after sleeping
continue # retry same label

elif "secondary rate limit" in create_resp.text.lower():
logging.error("Secondary rate limit triggered. Exiting immediately.")
sys.exit(1)
logging.warning(f"Secondary rate limit hit on label '{label_name}'. Sleeping {secondary_delay}s...")
time.sleep(secondary_delay)
secondary_delay = min(secondary_delay * 2, 300)
continue # retry same label

else:
logging.error(f"Access forbidden: {create_resp.text}")
Expand Down
4 changes: 4 additions & 0 deletions config/target_repos_status.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
{
"repos": {
"hackforla/ai-skills-assessor": {
"labeled": true,
"date": "2025-08-22T01:26:53Z"
},
"hackforla/ai-skills-assessor-testrepo1": {
"labeled": false,
"date": ""
}
Expand Down
1 change: 1 addition & 0 deletions issue_contributor_fetcher/org_fetcher/org_contr.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

225 changes: 225 additions & 0 deletions issue_contributor_fetcher/org_fetcher/org_fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
import os
import json
import requests
import csv
import logging
import time

from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Build a shared session with retries
def build_session():
s = requests.Session()
retry = Retry(
total=4,
connect=4,
read=4,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=frozenset(["GET"]),
raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry, pool_connections=10, pool_maxsize=10)
s.mount("https://", adapter)
s.headers.update(HEADERS)
return s

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")

# Script paths
script_dir = os.path.dirname(os.path.abspath(__file__))
CONFIG_FILE = os.path.join(script_dir, "target_org.json")
OUTPUT_FILE = os.path.join(script_dir, "org_contr.csv")

# --- GitHub token from secret ---
GITHUB_TOKEN = (
os.environ.get("PAT")
or os.environ.get("GITHUB_TOKEN")
or os.environ.get("GH_TOKEN")
)
if not GITHUB_TOKEN:
logging.error("GitHub token not found in env (PAT/GITHUB_TOKEN/GH_TOKEN).")
raise SystemExit(1)

HEADERS = {
"Authorization": f"Bearer {GITHUB_TOKEN}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
"User-Agent": "org-contributions-fetcher/1.0",
}

# build session after header is defined
SESSION = build_session()
REQUEST_TIMEOUT = 15

# Load configuration
with open(CONFIG_FILE, "r") as f:
config = json.load(f)

ORG = config.get("org")
USERS = config.get("users", [])
REPOS = config.get("repos", [])

if not ORG:
logging.error("Org not specified in config.")
exit(1)


# --- Helper functions ---
def fetch_repos(org):
"""Fetch all repos for an organization."""
repos = []
page = 1
while True:
url = f"https://api.github.com/orgs/{org}/repos"
params = {"per_page": 100, "page": page}
resp = SESSION.get(url, params=params, timeout=REQUEST_TIMEOUT)

if resp.status_code != 200:
logging.error(f"Error fetching repos for org {org}: {resp.status_code} {resp.text}")
break

data = resp.json()
if not data:
break

repos.extend([r["full_name"] for r in data])
page += 1

logging.info(f"Found {len(repos)} repos in org {org}")
return repos

def repo_exists(org, repo):
"""Check if a repo exists within the specified organization on GitHub."""
full_name = f"{org}/{repo}"
url = f"https://api.github.com/repos/{full_name}"
resp = SESSION.get(url, timeout=REQUEST_TIMEOUT)

if resp.status_code == 200:
return True
elif resp.status_code == 404:
logging.warning(f"Repo '{repo}' does not exist in org '{org}'. Skipping.")
return False
else:
logging.error(f"Error checking repo '{repo}' in org '{org}': {resp.status_code} {resp.text}")
return False

def fetch_contributions(repo, users=None, max_retries=5):
"""Fetch issues and PRs for specified users in a repo, or all if users is empty."""
users = users or []
results = []

if not users:
# Fetch all contributions in the repo
users_to_query = [None]
else:
users_to_query = users

for u in users_to_query:
if u:
logging.info(f"Fetching contributions for user '{u}' in repo '{repo}'")
query = f"repo:{repo} involves:{u}"
else:
logging.info(f"Fetching all contributions in repo '{repo}'")
query = f"repo:{repo}"

page = 1
while True:
url = "https://api.github.com/search/issues"
params = {"q": query, "per_page": 100, "page": page}

for attempt in range(max_retries):
resp = SESSION.get(url, params=params, timeout=REQUEST_TIMEOUT)
remaining = int(resp.headers.get("X-RateLimit-Remaining", 1))
reset_time = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60))

if resp.status_code == 200:
break
elif resp.status_code == 403 and "rate limit" in resp.text.lower():
wait_seconds = max(reset_time - time.time(), 5)
logging.warning(f"Rate limit hit. Waiting {int(wait_seconds)} seconds...")
time.sleep(wait_seconds)
else:
logging.error(f"Error fetching contributions for {repo}: {resp.status_code} {resp.text}")
else:
logging.error(f"Failed after {max_retries} retries for page {page} in {repo}")
break

data = resp.json()
items = data.get("items", [])
if not items:
break

for item in items:
assignees = item.get("assignees", [])
if assignees:
# One row per assigned user
for assignee in assignees:
results.append({
"user": assignee["login"],
"repo": repo,
"number": item["number"],
"type": "PR" if "pull_request" in item else "Issue"
})
else:
# If no assignees, use the author of the issue/PR
author = item.get("user", {}).get("login", "UNKNOWN")
results.append({
"user": author,
"repo": repo,
"number": item["number"],
"type": "PR" if "pull_request" in item else "Issue"
})

if "next" not in resp.links:
break

page += 1

return results



def org_fetcher(org, users=None, target_repos=None):
users = users or [] # ensure it's a list even if None
all_results = []

if target_repos:
logging.info(f"Fetching contributions for specified repos: {target_repos}")
for repo in target_repos:
full_repo_name = repo if '/' in repo else f"{org}/{repo}"
repo_results = fetch_contributions(full_repo_name, users)
all_results.extend(repo_results)
else:
repos = fetch_repos(org)
for repo in repos:
repo_results = fetch_contributions(repo, users)
all_results.extend(repo_results)

# Write CSV
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
with open(OUTPUT_FILE, "w", newline="") as f:
fieldnames = ["user", "org", "repo", "number", "type"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()

for item in all_results:
if "/" in item["repo"]:
org_name, repo_name = item["repo"].split("/", 1)
else:
org_name, repo_name = org, item["repo"]
writer.writerow({
"user": item["user"],
"org": org_name,
"repo": repo_name,
"number": item["number"],
"type": item["type"]
})

logging.info(f"Complete. Results written to {OUTPUT_FILE}")


if __name__ == "__main__":
org_fetcher(ORG, USERS, REPOS)

Loading