Skip to content

Commit 6fb67c3

Browse files
committed
Add initial support for collecting ProjectKBP old fix commits.
Signed-off-by: ziad hany <[email protected]>
1 parent dcb0511 commit 6fb67c3

File tree

2 files changed

+124
-0
lines changed

2 files changed

+124
-0
lines changed

vulnerabilities/improvers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from vulnerabilities.pipelines import flag_ghost_packages
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
22+
from vulnerabilities.pipelines.v2_improvers import (
23+
collect_commits_project_kb as collect_commits_project_kb_v2,
24+
)
2225
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2326
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2427
from vulnerabilities.pipelines.v2_improvers import (
@@ -68,5 +71,6 @@
6871
compute_version_rank_v2.ComputeVersionRankPipeline,
6972
compute_advisory_todo_v2.ComputeToDo,
7073
compute_advisory_todo.ComputeToDo,
74+
collect_commits_project_kb_v2.CollectFixCommitsProjectKBPipeline,
7175
]
7276
)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import csv
10+
from pathlib import Path
11+
12+
import saneyaml
13+
from fetchcode.vcs import fetch_via_vcs
14+
15+
from vulnerabilities.models import AdvisoryV2
16+
from vulnerabilities.models import CodeFixV2
17+
from vulnerabilities.pipelines import VulnerableCodePipeline
18+
19+
20+
class CollectFixCommitsProjectKBPipeline(VulnerableCodePipeline):
21+
"""
22+
Pipeline to collect fix commits from Project KB:
23+
https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv
24+
https://github.com/SAP/project-kb/blob/vulnerability-data/statements/*/*.yaml
25+
"""
26+
27+
pipeline_id = "kb_project_fix_commits"
28+
spdx_license_expression = "Apache-2.0"
29+
license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt"
30+
importer_name = "Project KB Importer"
31+
qualified_name = "kb_project_fix_commits"
32+
repo_url_vulnerability_data = "git+https://github.com/SAP/project-kb@vulnerability-data"
33+
repo_url_main = "git+https://github.com/SAP/project-kb"
34+
35+
@classmethod
36+
def steps(cls):
37+
return (cls.collect_fix_commits,)
38+
39+
def collect_fix_commits(self):
40+
self.vcs_response_main = fetch_via_vcs(self.repo_url_main)
41+
self.vcs_response_vuln_data = fetch_via_vcs(self.repo_url_vulnerability_data)
42+
43+
self.log(f"Processing ProjectKBP fix commits.")
44+
csv_database_filepath = (
45+
Path(self.vcs_response_main.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv"
46+
)
47+
try:
48+
with open(csv_database_filepath, mode="r", newline="", encoding="utf-8") as f:
49+
reader = csv.reader(f)
50+
next(reader, None) # Skip header row
51+
for row in reader:
52+
if len(row) != 4:
53+
continue
54+
vulnerability_id, repo_url, commit_hash, label = row
55+
56+
if not vulnerability_id:
57+
continue
58+
59+
try:
60+
advisory = AdvisoryV2.objects.get(advisory_id=vulnerability_id)
61+
except AdvisoryV2.DoesNotExist:
62+
self.log(f"Can't find vulnerability_id: {vulnerability_id}")
63+
continue
64+
65+
self.create_codefix_entries(advisory, repo_url, commit_hash, vulnerability_id)
66+
except FileNotFoundError:
67+
self.log(f"CSV file not found: {csv_database_filepath}")
68+
69+
base_path = Path(self.vcs_response_vuln_data.dest_dir) / "statements"
70+
for file_path in base_path.rglob("*.yaml"):
71+
if file_path.name != "statement.yaml":
72+
continue
73+
74+
with open(file_path) as f:
75+
vulnerability_fixes_data = saneyaml.load(f)
76+
77+
vulnerability_id = vulnerability_fixes_data.get("vulnerability_id")
78+
if not vulnerability_id:
79+
continue
80+
81+
try:
82+
advisory = AdvisoryV2.objects.get(advisory_id=vulnerability_id)
83+
except AdvisoryV2.DoesNotExist:
84+
self.log(f"Can't find vulnerability_id: {vulnerability_id}")
85+
continue
86+
87+
for commit_data in vulnerability_fixes_data.get("fixes", []):
88+
for commit in commit_data.get("commits", []):
89+
commit_id = commit.get("id")
90+
repo_url = commit.get("repository")
91+
92+
if not commit_id or not repo_url:
93+
continue
94+
95+
self.create_codefix_entries(advisory, repo_url, commit_id, vulnerability_id)
96+
97+
def create_codefix_entries(self, advisory, repo_url, commit_id, vulnerability_id):
98+
repo_url = repo_url.rstrip("/").removesuffix(".git")
99+
vcs_url = f"{repo_url}/commit/{commit_id}"
100+
101+
for impact in advisory.impacted_packages.all():
102+
for package in impact.affecting_packages.all():
103+
code_fix, created = CodeFixV2.objects.get_or_create(
104+
commits=[vcs_url],
105+
advisory=advisory,
106+
affected_package=package,
107+
)
108+
if created:
109+
self.log(
110+
f"Created CodeFix entry for vulnerability_id: {vulnerability_id} with VCS URL {vcs_url}"
111+
)
112+
113+
def clean_downloads(self):
114+
if self.vcs_response_main or self.vcs_response_vuln_data:
115+
self.log(f"Removing cloned repository")
116+
self.vcs_response_main.delete()
117+
self.vcs_response_vuln_data.delete()
118+
119+
def on_failure(self):
120+
self.clean_downloads()

0 commit comments

Comments
 (0)