diff --git a/Cargo.lock b/Cargo.lock index 06943f15..559c7a9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -444,6 +444,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + [[package]] name = "fastrand" version = "2.0.1" @@ -628,7 +634,7 @@ checksum = "bb07a4ffed2093b118a525b1d8f5204ae274faed5604537caf7135d0f18d9887" dependencies = [ "log", "plain", - "scroll", + "scroll 0.12.0", ] [[package]] @@ -1215,6 +1221,17 @@ dependencies = [ "sha2", ] +[[package]] +name = "pdb" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82040a392923abe6279c00ab4aff62d5250d1c8555dc780e4b02783a7aa74863" +dependencies = [ + "fallible-iterator", + "scroll 0.11.0", + "uuid", +] + [[package]] name = "pem" version = "3.0.3" @@ -1325,9 +1342,10 @@ dependencies = [ "object", "octocrab", "once_cell", + "pdb", "rayon", "reqwest", - "scroll", + "scroll 0.12.0", "semver", "serde", "serde_json", @@ -1428,10 +1446,12 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "winreg", ] @@ -1587,6 +1607,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" + [[package]] name = "scroll" version = "0.12.0" @@ -2190,6 +2216,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" + [[package]] name = "vcpkg" version = "0.2.15" @@ -2289,6 +2321,19 @@ version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.68" diff --git a/Cargo.toml b/Cargo.toml index d6432348..f7523b2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,12 +22,13 @@ normalize-path = "0.2.1" object = "0.32.2" octocrab = { version = "0.34.1", features = ["rustls", "stream"] } once_cell = "1.19.0" +pdb = "0.8.0" rayon = "1.8.1" -reqwest = { version = "0.11.24", features = ["rustls"] } +reqwest = { version = "0.11.24", features = ["rustls", "stream"] } scroll = "0.12.0" semver = "1.0.22" -serde_json = "1.0.114" serde = { version = "1.0.197", features = ["derive"] } +serde_json = "1.0.114" sha2 = "0.10.8" tar = "0.4.40" tempfile = "3.10.0" diff --git a/docs/running.rst b/docs/running.rst index 2ba3b83b..02e4e2a4 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -151,11 +151,17 @@ Common configurations include: A debug build. No optimizations. The archive flavor denotes the content in the archive. See -:ref:`distributions` for more. Casual users will likely want to use the -``install_only`` archive, as most users do not need the build artifacts -present in the ``full`` archive. The ``install_only`` archive doesn't -include the build configuration in its file name. It's based on the fastest -available build configuration for a given target. +:ref:`distributions` for more. + +Casual users will likely want to use the ``install_only`` archive, as most +users do not need the build artifacts present in the ``full`` archive. +The ``install_only`` archive doesn't include the build configuration in its +file name. It's based on the fastest available build configuration for a given +target. + +An ``install_only_stripped`` archive is also available. This archive is +equivalent to ``install_only``, but without debug symbols, which results in a +smaller download and on-disk footprint. Extracting Distributions ======================== diff --git a/pythonbuild/downloads.py b/pythonbuild/downloads.py index 4c669a8f..512f2d7f 100644 --- a/pythonbuild/downloads.py +++ b/pythonbuild/downloads.py @@ -175,18 +175,21 @@ "sha256": "04cb77c660f09df017a57738ae9635ef23a506024789f2f18da1304b45af2023", "version": "14.0.3+20220508", }, + # Remember to update LLVM_URL in src/release.rs whenever upgrading. "llvm-18-x86_64-linux": { "url": "https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-gnu_only-x86_64-unknown-linux-gnu.tar.zst", "size": 242840506, "sha256": "080c233fc7d75031b187bbfef62a4f9abc01188effb0c68fbc7dc4bc7370ee5b", "version": "18.0.8+20240713", }, + # Remember to update LLVM_URL in src/release.rs whenever upgrading. "llvm-aarch64-macos": { "url": "https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-aarch64-apple-darwin.tar.zst", "size": 136598617, "sha256": "320da8d639186e020e7d54cdc35b7a5473b36cef08fdf7b22c03b59a273ba593", "version": "18.0.8+20240713", }, + # Remember to update LLVM_URL in src/release.rs whenever upgrading. "llvm-x86_64-macos": { "url": "https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-x86_64-apple-darwin.tar.zst", "size": 136599290, diff --git a/src/github.rs b/src/github.rs index cd95e953..1f4c9e36 100644 --- a/src/github.rs +++ b/src/github.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::release::{bootstrap_llvm, produce_install_only_stripped}; use { crate::release::{produce_install_only, RELEASE_TRIPLES}, anyhow::{anyhow, Result}, @@ -256,9 +257,12 @@ pub async fn command_fetch_release_distributions(args: &ArgMatches) -> Result<() } } + let llvm_dir = bootstrap_llvm().await?; + install_paths .par_iter() .try_for_each(|path| -> Result<()> { + // Create the `install_only` archive. println!( "producing install_only archive from {}", path.file_name() @@ -276,6 +280,25 @@ pub async fn command_fetch_release_distributions(args: &ArgMatches) -> Result<() .to_string_lossy() ); + // Create the `install_only_stripped` archive. + println!( + "producing install_only_stripped archive from {}", + dest_path + .file_name() + .expect("should have file name") + .to_string_lossy() + ); + + let dest_path = produce_install_only_stripped(&dest_path, &llvm_dir)?; + + println!( + "releasing {}", + dest_path + .file_name() + .expect("should have file name") + .to_string_lossy() + ); + Ok(()) })?; @@ -358,6 +381,17 @@ pub async fn command_upload_release_distributions(args: &ArgMatches) -> Result<( ), format!("cpython-{}+{}-{}-install_only.tar.gz", version, tag, triple), ); + + wanted_filenames.insert( + format!( + "cpython-{}-{}-install_only-{}.tar.gz", + version, triple, datetime + ), + format!( + "cpython-{}+{}-{}-install_only_stripped.tar.gz", + version, tag, triple + ), + ); } } diff --git a/src/main.rs b/src/main.rs index 1e217181..b12ef0fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,6 +85,18 @@ fn main_impl() -> Result<()> { ), ); + let app = app.subcommand( + Command::new("convert-install-only-stripped") + .about("Convert an install_only .tar.gz archive to an install_only_stripped tar.gz archive") + .arg( + Arg::new("path") + .required(true) + .action(ArgAction::Append) + .value_parser(value_parser!(PathBuf)) + .help("Path of archive to convert"), + ), + ); + let app = app.subcommand( Command::new("upload-release-distributions") .about("Upload release distributions to a GitHub release") @@ -174,7 +186,20 @@ fn main_impl() -> Result<()> { match matches.subcommand() { Some(("convert-install-only", args)) => { for path in args.get_many::("path").unwrap() { - let dest_path = crate::release::produce_install_only(path)?; + let dest_path = release::produce_install_only(path)?; + println!("wrote {}", dest_path.display()); + } + + Ok(()) + } + Some(("convert-install-only-stripped", args)) => { + let llvm_dir = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(release::bootstrap_llvm())?; + for path in args.get_many::("path").unwrap() { + let dest_path = release::produce_install_only_stripped(path, &llvm_dir)?; println!("wrote {}", dest_path.display()); } @@ -185,18 +210,16 @@ fn main_impl() -> Result<()> { .enable_all() .build() .unwrap() - .block_on(crate::github::command_fetch_release_distributions(args)) + .block_on(github::command_fetch_release_distributions(args)) } Some(("upload-release-distributions", args)) => { tokio::runtime::Builder::new_current_thread() .enable_all() .build() .unwrap() - .block_on(crate::github::command_upload_release_distributions(args)) - } - Some(("validate-distribution", args)) => { - crate::validation::command_validate_distribution(args) + .block_on(github::command_upload_release_distributions(args)) } + Some(("validate-distribution", args)) => validation::command_validate_distribution(args), _ => Err(anyhow!("invalid sub-command")), } } diff --git a/src/release.rs b/src/release.rs index fa6c3387..ba2ef5e4 100644 --- a/src/release.rs +++ b/src/release.rs @@ -2,6 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use anyhow::Context; +use futures::StreamExt; + +use object::FileKind; +use std::process::{Command, Stdio}; +use url::Url; use { crate::json::parse_python_json, anyhow::{anyhow, Result}, @@ -200,7 +206,7 @@ pub static RELEASE_TRIPLES: Lazy> = Lazy:: h }); -/// Convert a .tar.zst archive to an install only .tar.gz archive. +/// Convert a .tar.zst archive to an install-only .tar.gz archive. pub fn convert_to_install_only(reader: impl BufRead, writer: W) -> Result { let dctx = zstd::stream::Decoder::new(reader)?; @@ -279,6 +285,98 @@ pub fn convert_to_install_only(reader: impl BufRead, writer: W) -> Res Ok(builder.into_inner()?.finish()?) } +/// Run `llvm-strip` over the given data, returning the stripped data. +fn llvm_strip(data: &[u8], llvm_dir: &Path) -> Result> { + let mut command = Command::new(llvm_dir.join("bin/llvm-strip")) + .arg("--strip-debug") + .arg("-") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .with_context(|| "failed to spawn llvm-strip")?; + + command + .stdin + .as_mut() + .unwrap() + .write_all(data) + .with_context(|| "failed to write data to llvm-strip")?; + + let output = command + .wait_with_output() + .with_context(|| "failed to wait for llvm-strip")?; + if !output.status.success() { + return Err(anyhow!("llvm-strip failed: {}", output.status)); + } + + Ok(output.stdout) +} + +/// Given an install-only .tar.gz archive, strip the underlying build. +pub fn convert_to_stripped( + reader: impl BufRead, + writer: W, + llvm_dir: &Path, +) -> Result { + let dctx = flate2::read::GzDecoder::new(reader); + + let mut tar_in = tar::Archive::new(dctx); + + let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::default()); + + let mut builder = tar::Builder::new(writer); + + for entry in tar_in.entries()? { + let mut entry = entry?; + + let mut data = vec![]; + entry.read_to_end(&mut data)?; + + let path = entry.path()?; + + // Drop PDB files. + match pdb::PDB::open(std::io::Cursor::new(&data)) { + Ok(_) => { + continue; + } + Err(err) => { + if path.extension().is_some_and(|ext| ext == "pdb") { + println!( + "file with `.pdb` extension ({}) failed to parse as PDB :{err}", + path.display() + ); + } + } + } + + // If we have an ELF, Mach-O, or PE file, strip it in-memory with `llvm-strip`, and + // return the stripped data. + if matches!( + FileKind::parse(data.as_slice()), + Ok(FileKind::Elf32 + | FileKind::Elf64 + | FileKind::MachO32 + | FileKind::MachO64 + | FileKind::MachOFat32 + | FileKind::MachOFat64 + | FileKind::Pe32 + | FileKind::Pe64) + ) { + data = llvm_strip(&data, llvm_dir) + .with_context(|| format!("failed to strip {}", path.display()))?; + } + + let mut header = entry.header().clone(); + header.set_size(data.len() as u64); + header.set_cksum(); + + builder.append(&header, std::io::Cursor::new(data))?; + } + + Ok(builder.into_inner()?.finish()?) +} + +/// Create an install-only .tar.gz archive from a .tar.zst archive. pub fn produce_install_only(tar_zst_path: &Path) -> Result { let buf = std::fs::read(tar_zst_path)?; @@ -306,3 +404,113 @@ pub fn produce_install_only(tar_zst_path: &Path) -> Result { Ok(dest_path) } + +pub fn produce_install_only_stripped(tar_gz_path: &Path, llvm_dir: &Path) -> Result { + let buf = std::fs::read(tar_gz_path)?; + + let size_before = buf.len(); + + let gz_data = convert_to_stripped( + std::io::Cursor::new(buf), + std::io::Cursor::new(vec![]), + llvm_dir, + )? + .into_inner(); + + let size_after = gz_data.len(); + + println!( + "stripped {} from {size_before} to {size_after} bytes", + tar_gz_path.display() + ); + + let filename = tar_gz_path + .file_name() + .expect("should have filename") + .to_string_lossy(); + + let mut name_parts = filename + .split('-') + .map(|x| x.to_string()) + .collect::>(); + let parts_len = name_parts.len(); + + name_parts[parts_len - 1] = "install_only_stripped".to_string(); + + let install_only_name = name_parts.join("-"); + let install_only_name = format!("{install_only_name}.tar.gz"); + + let dest_path = tar_gz_path.with_file_name(install_only_name); + std::fs::write(&dest_path, gz_data)?; + + Ok(dest_path) +} + +/// URL from which to download LLVM. +/// +/// To be kept in sync with `pythonbuild/downloads.py`. +static LLVM_URL: Lazy = Lazy::new(|| { + if cfg!(target_os = "macos") { + if std::env::consts::ARCH == "aarch64" { + Url::parse("https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-aarch64-apple-darwin.tar.zst").unwrap() + } else if std::env::consts::ARCH == "x86_64" { + Url::parse("https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-x86_64-apple-darwin.tar.zst").unwrap() + } else { + panic!("unsupported macOS architecture"); + } + } else if cfg!(target_os = "linux") { + Url::parse("https://github.com/indygreg/toolchain-tools/releases/download/toolchain-bootstrap%2F20240713/llvm-18.0.8+20240713-gnu_only-x86_64-unknown-linux-gnu.tar.zst").unwrap() + } else { + panic!("unsupported platform"); + } +}); + +/// Bootstrap `llvm` for the current platform. +/// +/// Returns the path to the top-level `llvm` directory. +pub async fn bootstrap_llvm() -> Result { + let url = &*LLVM_URL; + let filename = url.path_segments().unwrap().last().unwrap(); + + let llvm_dir = Path::new("build").join("llvm"); + + // If `llvm` is already available with the target version, return it. + if llvm_dir.join(filename).exists() { + return Ok(llvm_dir.join("llvm")); + } + + println!("Downloading LLVM tarball from: {url}"); + + // Create a temporary directory to download and extract the LLVM tarball. + let temp_dir = tempfile::TempDir::new()?; + + // Download the tarball. + let tarball_path = temp_dir + .path() + .join(url.path_segments().unwrap().last().unwrap()); + let mut tarball_file = tokio::fs::File::create(&tarball_path).await?; + let mut bytes_stream = reqwest::Client::new() + .get(url.clone()) + .send() + .await? + .bytes_stream(); + while let Some(chunk) = bytes_stream.next().await { + tokio::io::copy(&mut chunk?.as_ref(), &mut tarball_file).await?; + } + + // Decompress the tarball. + let tarball = std::fs::File::open(&tarball_path)?; + let tar = zstd::stream::Decoder::new(std::io::BufReader::new(tarball))?; + let mut archive = tar::Archive::new(tar); + archive.unpack(temp_dir.path())?; + + // Persist the directory. + match tokio::fs::remove_dir_all(&llvm_dir).await { + Ok(_) => {} + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => return Err(err).context("failed to remove existing llvm directory"), + } + tokio::fs::rename(temp_dir.into_path(), &llvm_dir).await?; + + Ok(llvm_dir.join("llvm")) +}