|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# VulnerableCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/nexB/vulnerablecode for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | +import logging |
| 10 | +import os |
| 11 | +from hashlib import sha512 |
| 12 | +from pathlib import Path |
| 13 | + |
| 14 | +import saneyaml |
| 15 | +from django.core.management.base import BaseCommand |
| 16 | +from django.core.management.base import CommandError |
| 17 | +from packageurl import PackageURL |
| 18 | + |
| 19 | +from vulnerabilities.models import Package |
| 20 | + |
| 21 | +logger = logging.getLogger(__name__) |
| 22 | + |
| 23 | + |
| 24 | +class Command(BaseCommand): |
| 25 | + help = "export vulnerablecode data" |
| 26 | + |
| 27 | + def add_arguments(self, parser): |
| 28 | + parser.add_argument("path") |
| 29 | + |
| 30 | + def handle(self, *args, **options): |
| 31 | + if options["path"]: |
| 32 | + git_path = Path(options["path"]) |
| 33 | + if not git_path.is_dir(): |
| 34 | + raise CommandError("Please enter a valid path") |
| 35 | + |
| 36 | + self.export_data(git_path) |
| 37 | + |
| 38 | + self.stdout.write(self.style.SUCCESS("Successfully exported vulnerablecode data")) |
| 39 | + |
| 40 | + def export_data(self, git_path): |
| 41 | + """ |
| 42 | + export vulnerablecode data |
| 43 | + by running `python manage.py export /path/vulnerablecode-data` |
| 44 | + """ |
| 45 | + self.stdout.write("Exporting vulnerablecode data") |
| 46 | + |
| 47 | + ecosystems = [pkg.type for pkg in Package.objects.distinct("type")] |
| 48 | + |
| 49 | + for ecosystem in ecosystems: |
| 50 | + package_files = {} # {"package path": "data" } |
| 51 | + vul_files = {} # {"vulnerability path": "data" } |
| 52 | + |
| 53 | + for purl in ( |
| 54 | + Package.objects.filter(type=ecosystem) |
| 55 | + .prefetch_related("vulnerabilities") |
| 56 | + .paginated() |
| 57 | + ): |
| 58 | + purl_without_version = PackageURL( |
| 59 | + type=purl.type, |
| 60 | + namespace=purl.namespace, |
| 61 | + name=purl.name, |
| 62 | + ) |
| 63 | + |
| 64 | + # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml |
| 65 | + pkg_filepath = ( |
| 66 | + f"./aboutcode-packages-{get_purl_hash(purl_without_version)}/{purl.type}/{purl.namespace}/{purl.name}" |
| 67 | + f"/versions/vulnerabilities.yml" |
| 68 | + ) |
| 69 | + |
| 70 | + package_data = { |
| 71 | + "purl": str(purl), |
| 72 | + "affected_by_vulnerabilities": [ |
| 73 | + vuln.vulnerability_id for vuln in purl.affected_by |
| 74 | + ], |
| 75 | + "fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing], |
| 76 | + } |
| 77 | + |
| 78 | + if pkg_filepath in package_files: |
| 79 | + package_files[pkg_filepath]["versions"].append(package_data) |
| 80 | + else: |
| 81 | + package_files[pkg_filepath] = { |
| 82 | + "package": str(purl_without_version), |
| 83 | + "versions": [package_data], |
| 84 | + } |
| 85 | + |
| 86 | + for vul in purl.vulnerabilities.all(): |
| 87 | + vulnerability_id = vul.vulnerability_id |
| 88 | + # ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml |
| 89 | + vul_filepath = ( |
| 90 | + f"./aboutcode-vulnerabilities-{vulnerability_id[5:7]}/{vulnerability_id[10:12]}" |
| 91 | + f"/{vulnerability_id}/{vulnerability_id}.yml" |
| 92 | + ) |
| 93 | + vul_files[vul_filepath] = { |
| 94 | + "vulnerability_id": vul.vulnerability_id, |
| 95 | + "aliases": [alias.alias for alias in vul.get_aliases], |
| 96 | + "summary": vul.summary, |
| 97 | + "severities": [severity for severity in vul.severities.values()], |
| 98 | + "references": [ref for ref in vul.references.values()], |
| 99 | + "weaknesses": [ |
| 100 | + "CWE-" + str(weakness["cwe_id"]) for weakness in vul.weaknesses.values() |
| 101 | + ], |
| 102 | + } |
| 103 | + |
| 104 | + for items in [package_files, vul_files]: |
| 105 | + for filepath, data in items.items(): |
| 106 | + create_file(filepath, git_path, data) |
| 107 | + |
| 108 | + self.stdout.write(f"Successfully exported {ecosystem} data") |
| 109 | + |
| 110 | + |
| 111 | +def create_file(filepath, git_path, data): |
| 112 | + """ |
| 113 | + Check if the directories exist if it doesn't exist create a new one then Create the file |
| 114 | + ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml |
| 115 | + ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml |
| 116 | + ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml |
| 117 | + """ |
| 118 | + filepath = git_path.joinpath(filepath) |
| 119 | + dirname = os.path.dirname(filepath) |
| 120 | + os.makedirs(dirname, exist_ok=True) |
| 121 | + data = saneyaml.dump(data) |
| 122 | + with open(filepath, encoding="utf-8", mode="w") as f: |
| 123 | + f.write(data) |
| 124 | + |
| 125 | + |
| 126 | +def get_purl_hash(purl: PackageURL, length: int = 3) -> str: |
| 127 | + """ |
| 128 | + Return a short lower cased hash of a purl. |
| 129 | + https://github.com/nexB/purldb/pull/235/files#diff-a1fd023bd42d73f56019d540f38be711255403547add15108540d70f9948dd40R154 |
| 130 | + """ |
| 131 | + purl_bytes = str(purl).encode("utf-8") |
| 132 | + short_hash = sha512(purl_bytes).hexdigest()[:length] |
| 133 | + return short_hash.lower() |
0 commit comments