|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | +src_size.py <path-or-vcs-url> |
| 4 | +================================ |
| 5 | +Measure how much space a Python package (plus its **runtime** dependencies) |
| 6 | +occupies when built **from source**, without counting the extra build tools we |
| 7 | +install inside a temporary virtual-env (``pip``, ``setuptools``, ``wheel``, |
| 8 | +etc.). |
| 9 | +
|
| 10 | +What you get |
| 11 | +------------ |
| 12 | +1. **Raw source** - size of the directory / checkout you point at. |
| 13 | +2. **Built artefacts** - combined size of the sdist + wheel produced by |
| 14 | + ``python -m build``. |
| 15 | +3. **Runtime tree** - bytes taken by the package **and its runtime deps** once |
| 16 | + installed, *excluding* build-time tooling. |
| 17 | +4. *(optional)* a **breakdown** of every runtime distribution, sorted |
| 18 | + large → small. |
| 19 | +
|
| 20 | +CLI flags |
| 21 | +--------- |
| 22 | +``` |
| 23 | +--no-deps Skip installing dependencies (handy for library-only size) |
| 24 | +--breakdown, -b Show per-package size table |
| 25 | +--include-tools Include build tools (pip/setuptools/wheel) in totals + table |
| 26 | +``` |
| 27 | +
|
| 28 | +> **Note**The script still seeds pip inside the venv so it works on |
| 29 | +> pip-less interpreters - those files are just ignored by default in the |
| 30 | +> final numbers. |
| 31 | +""" |
| 32 | + |
| 33 | +from __future__ import annotations |
| 34 | + |
| 35 | +import argparse |
| 36 | +import ensurepip |
| 37 | +import json |
| 38 | +import os |
| 39 | +import pathlib |
| 40 | +import subprocess |
| 41 | +import sys |
| 42 | +import tempfile |
| 43 | +from typing import List, Tuple |
| 44 | + |
| 45 | +BYTES_IN_MB = 1_048_576 |
| 46 | +BUILD_TOOLS = {"pip", "setuptools", "wheel"} |
| 47 | + |
| 48 | +# --------------------------------------------------------------------------- |
| 49 | +# Helpers |
| 50 | +# --------------------------------------------------------------------------- |
| 51 | + |
| 52 | +def du(path: pathlib.Path) -> int: |
| 53 | + """Recursive size of *path* in bytes.""" |
| 54 | + return sum(p.stat().st_size for p in path.rglob("*") if p.is_file()) |
| 55 | + |
| 56 | + |
| 57 | +def run(cmd: List[str]) -> None: |
| 58 | + subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) |
| 59 | + |
| 60 | + |
| 61 | +def build_artefacts(src: pathlib.Path, out: pathlib.Path) -> List[pathlib.Path]: |
| 62 | + run([sys.executable, "-m", "pip", "install", "-q", "build"]) |
| 63 | + run([sys.executable, "-m", "build", "--sdist", "--wheel", "--outdir", out, src]) |
| 64 | + return list(out.glob("*.*")) |
| 65 | + |
| 66 | + |
| 67 | +def pick_one(artefacts: List[pathlib.Path]) -> pathlib.Path: |
| 68 | + return next((a for a in artefacts if a.suffix == ".whl"), artefacts[0]) |
| 69 | + |
| 70 | + |
| 71 | +def create_venv(venv: pathlib.Path) -> pathlib.Path: |
| 72 | + run([sys.executable, "-m", "venv", venv]) |
| 73 | + py = venv / ("Scripts/python.exe" if os.name == "nt" else "bin/python") |
| 74 | + run([str(py), "-m", "ensurepip", "--upgrade"]) |
| 75 | + run([str(py), "-m", "pip", "install", "-q", "--upgrade", "pip", "setuptools", "wheel"]) |
| 76 | + return py |
| 77 | + |
| 78 | + |
| 79 | +def dist_sizes(py: pathlib.Path) -> List[Tuple[str, int]]: |
| 80 | + """Return list of (dist_name, size_bytes) for every distribution in venv.""" |
| 81 | + code = r''' |
| 82 | +import importlib.metadata as m, pathlib, json, os |
| 83 | +sizes = {} |
| 84 | +for dist in m.distributions(): |
| 85 | + total = 0 |
| 86 | + for entry in dist.files or []: |
| 87 | + p = pathlib.Path(dist.locate_file(entry)) |
| 88 | + if p.is_file(): |
| 89 | + try: |
| 90 | + total += p.stat().st_size |
| 91 | + except FileNotFoundError: |
| 92 | + pass |
| 93 | + sizes[dist.metadata['Name']] = total |
| 94 | +print(json.dumps(sizes)) |
| 95 | +''' |
| 96 | + out = subprocess.check_output([str(py), "-c", code], text=True) |
| 97 | + data = json.loads(out) |
| 98 | + return sorted(((k, v) for k, v in data.items()), key=lambda kv: kv[1], reverse=True) |
| 99 | + |
| 100 | + |
| 101 | +def canonical(name: str) -> str: |
| 102 | + return name.lower().replace("_", "-") |
| 103 | + |
| 104 | + |
| 105 | +# --------------------------------------------------------------------------- |
| 106 | +# Main |
| 107 | +# --------------------------------------------------------------------------- |
| 108 | + |
| 109 | +def main(src: str, include_deps: bool, show_breakdown: bool, include_tools: bool) -> None: |
| 110 | + ensurepip.bootstrap() # ensure pip for outer interpreter |
| 111 | + |
| 112 | + with tempfile.TemporaryDirectory() as tmp_s: |
| 113 | + tmp = pathlib.Path(tmp_s) |
| 114 | + |
| 115 | + # 1. Obtain source --------------------------------------------------- |
| 116 | + if pathlib.Path(src).is_dir(): |
| 117 | + src_dir = pathlib.Path(src).resolve() |
| 118 | + else: |
| 119 | + src_dir = tmp / "clone" |
| 120 | + run(["git", "clone", "--depth", "1", src, src_dir]) |
| 121 | + |
| 122 | + print(f"Raw source: {du(src_dir)/BYTES_IN_MB:.2f} MB") |
| 123 | + |
| 124 | + # 2. Build artefacts ------------------------------------------------- |
| 125 | + artefacts = build_artefacts(src_dir, tmp) |
| 126 | + print(f"Sdist+wheel: {sum(p.stat().st_size for p in artefacts)/BYTES_IN_MB:.2f} MB") |
| 127 | + artefact = pick_one(artefacts) |
| 128 | + |
| 129 | + # 3. Install into temp venv ----------------------------------------- |
| 130 | + py = create_venv(tmp / "venv") |
| 131 | + install_cmd = [str(py), "-m", "pip", "install", "-q", str(artefact)] |
| 132 | + if not include_deps: |
| 133 | + install_cmd.insert(5, "--no-deps") |
| 134 | + run(install_cmd) |
| 135 | + |
| 136 | + dists = dist_sizes(py) |
| 137 | + # Filter build tools unless user asked to keep them |
| 138 | + runtime_dists = [(n, sz) for n, sz in dists if include_tools or canonical(n) not in {canonical(t) for t in BUILD_TOOLS}] |
| 139 | + |
| 140 | + total_runtime = sum(sz for _, sz in runtime_dists) |
| 141 | + print(f"Runtime tree: {total_runtime/BYTES_IN_MB:.2f} MB" + (" (includes build tools)" if include_tools else "")) |
| 142 | + |
| 143 | + if show_breakdown: |
| 144 | + print("\nBreakdown (descending):") |
| 145 | + for name, sz in runtime_dists: |
| 146 | + print(f" {name:<25} {sz/BYTES_IN_MB:7.2f} MB") |
| 147 | + |
| 148 | + |
| 149 | +if __name__ == "__main__": |
| 150 | + p = argparse.ArgumentParser(description="Measure on-disk size of a Python package built from source.") |
| 151 | + p.add_argument("source", help="Path, git/https URL, or anything pip understands.") |
| 152 | + p.add_argument("--no-deps", action="store_true", help="Skip installing dependencies inside the tmp venv.") |
| 153 | + p.add_argument("--breakdown", "-b", action="store_true", help="Show per-package size contribution (runtime only).") |
| 154 | + p.add_argument("--include-tools", action="store_true", help="Include build tools (pip/setuptools/wheel) in totals and table.") |
| 155 | + args = p.parse_args() |
| 156 | + main(args.source, include_deps=not args.no_deps, show_breakdown=args.breakdown, include_tools=args.include_tools) |
0 commit comments