diff --git a/Cargo.lock b/Cargo.lock index 9609dcde..bf11ea1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -546,7 +546,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" dependencies = [ "const_format_proc_macros", - "konst", ] [[package]] @@ -1184,7 +1183,6 @@ dependencies = [ "anyhow", "bstr", "bumpalo", - "const_format", "csv-async", "ctor", "derive_more", @@ -1198,10 +1196,12 @@ dependencies = [ "fspy_test_bin", "futures-util", "libc", + "materialized_artifact", + "materialized_artifact_build", "nix 0.30.1", "ouroboros", - "rand 0.9.2", "rustc-hash", + "sha2", "subprocess_test", "tar", "tempfile", @@ -1213,7 +1213,6 @@ dependencies = [ "winapi", "wincode", "winsafe 0.0.24", - "xxhash-rust", ] [[package]] @@ -1683,21 +1682,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "konst" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330f0e13e6483b8c34885f7e6c9f19b1a7bd449c673fbb948a51c99d66ef74f4" -dependencies = [ - "konst_macro_rules", -] - -[[package]] -name = "konst_macro_rules" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" - [[package]] name = "kqueue" version = "1.1.1" @@ -1860,6 +1844,20 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "materialized_artifact" +version = "0.0.0" +dependencies = [ + "tempfile", +] + +[[package]] +name = "materialized_artifact_build" +version = "0.0.0" +dependencies = [ + "xxhash-rust", +] + [[package]] name = "memchr" version = "2.8.0" diff --git a/Cargo.toml b/Cargo.toml index efeba969..85c0f40f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,6 @@ cc = "1.2.39" clap = "4.5.53" color-eyre = "0.6.5" compact_str = "0.9.0" -const_format = "0.2.34" constcat = "0.6.1" copy_dir = "0.1.3" cow-utils = "0.1.3" @@ -70,11 +69,13 @@ derive_more = "2.0.1" diff-struct = "0.5.3" directories = "6.0.0" elf = { version = "0.8.0", default-features = false } +materialized_artifact = { path = "crates/materialized_artifact" } +materialized_artifact_build = { path = "crates/materialized_artifact_build" } flate2 = "1.0.35" fspy = { path = "crates/fspy" } fspy_detours_sys = { path = "crates/fspy_detours_sys" } -fspy_preload_unix = { path = "crates/fspy_preload_unix", artifact = "cdylib" } -fspy_preload_windows = { path = "crates/fspy_preload_windows", artifact = "cdylib" } +fspy_preload_unix = { path = "crates/fspy_preload_unix", artifact = "cdylib", target = "target" } +fspy_preload_windows = { path = "crates/fspy_preload_windows", artifact = "cdylib", target = "target" } fspy_seccomp_unotify = { path = "crates/fspy_seccomp_unotify" } fspy_shared = { path = "crates/fspy_shared" } fspy_shared_unix = { path = "crates/fspy_shared_unix" } @@ -103,7 +104,6 @@ pretty_assertions = "1.4.1" pty_terminal = { path = "crates/pty_terminal" } pty_terminal_test = { path = "crates/pty_terminal_test" } pty_terminal_test_client = { path = "crates/pty_terminal_test_client" } -rand = "0.9.1" ratatui = "0.30.0" rayon = "1.10.0" ref-cast = "1.0.24" diff --git a/crates/fspy/Cargo.toml b/crates/fspy/Cargo.toml index a9a1ffff..e8443e55 100644 --- a/crates/fspy/Cargo.toml +++ b/crates/fspy/Cargo.toml @@ -9,36 +9,30 @@ allocator-api2 = { workspace = true, features = ["alloc"] } wincode = { workspace = true } bstr = { workspace = true, default-features = false } bumpalo = { workspace = true } -const_format = { workspace = true, features = ["fmt"] } derive_more = { workspace = true, features = ["debug"] } +materialized_artifact = { workspace = true } fspy_shared = { workspace = true } futures-util = { workspace = true } libc = { workspace = true } ouroboros = { workspace = true } -rand = { workspace = true } rustc-hash = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["net", "process", "io-util", "sync", "rt"] } tokio-util = { workspace = true } which = { workspace = true, features = ["tracing"] } -xxhash-rust = { workspace = true } [target.'cfg(target_os = "linux")'.dependencies] fspy_seccomp_unotify = { workspace = true, features = ["supervisor"] } nix = { workspace = true, features = ["uio"] } tokio = { workspace = true, features = ["bytes"] } -[target.'cfg(all(unix, not(target_env = "musl")))'.dependencies] -fspy_preload_unix = { workspace = true } - [target.'cfg(unix)'.dependencies] fspy_shared_unix = { workspace = true } nix = { workspace = true, features = ["fs", "process", "socket", "feature"] } [target.'cfg(target_os = "windows")'.dependencies] fspy_detours_sys = { workspace = true } -fspy_preload_windows = { workspace = true } winapi = { workspace = true, features = ["winbase", "securitybaseapi", "handleapi"] } winsafe = { workspace = true } @@ -59,11 +53,19 @@ fspy_test_bin = { path = "../fspy_test_bin", artifact = "bin", target = "aarch64 [target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dev-dependencies] fspy_test_bin = { path = "../fspy_test_bin", artifact = "bin", target = "x86_64-unknown-linux-musl" } +# Artifact build-deps must be unconditional: cargo's resolver panics when +# `artifact = "cdylib"` deps live under a `[target.cfg.build-dependencies]` +# block on cross-compile. Each preload crate's source is cfg-gated to compile +# as an empty cdylib on non-applicable targets, so the unused cross-target +# builds are cheap. [build-dependencies] anyhow = { workspace = true } +materialized_artifact_build = { workspace = true } flate2 = { workspace = true } +fspy_preload_unix = { workspace = true } +fspy_preload_windows = { workspace = true } +sha2 = { workspace = true } tar = { workspace = true } -xxhash-rust = { workspace = true, features = ["xxh3"] } [lints] workspace = true @@ -72,4 +74,4 @@ workspace = true doctest = false [package.metadata.cargo-shear] -ignored = ["ctor", "fspy_test_bin"] +ignored = ["ctor", "fspy_test_bin", "fspy_preload_unix", "fspy_preload_windows"] diff --git a/crates/fspy/build.rs b/crates/fspy/build.rs index 84987b42..b6659f15 100644 --- a/crates/fspy/build.rs +++ b/crates/fspy/build.rs @@ -1,15 +1,16 @@ use std::{ - env::{self, current_dir}, + env, + fmt::Write as _, fs, io::{Cursor, Read}, - path::Path, + path::{Path, PathBuf}, process::{Command, Stdio}, }; use anyhow::{Context, bail}; -use xxhash_rust::xxh3::xxh3_128; +use sha2::{Digest, Sha256}; -fn download(url: &str) -> anyhow::Result> { +fn download(url: &str) -> anyhow::Result> { let curl = Command::new("curl") .args([ "-f", // fail on HTTP errors @@ -22,15 +23,14 @@ fn download(url: &str) -> anyhow::Result> { if !output.status.success() { bail!("curl exited with status {} trying to download {}", output.status, url); } - Ok(Cursor::new(output.stdout)) + Ok(output.stdout) } -fn unpack_tar_gz(content: impl Read, path: &str) -> anyhow::Result> { +fn unpack_tar_gz(tarball: impl Read, path: &str) -> anyhow::Result> { use flate2::read::GzDecoder; use tar::Archive; - // let path = path.as_ref(); - let tar = GzDecoder::new(content); + let tar = GzDecoder::new(tarball); let mut archive = Archive::new(tar); for entry in archive.entries()? { let mut entry = entry?; @@ -43,89 +43,124 @@ fn unpack_tar_gz(content: impl Read, path: &str) -> anyhow::Result> { bail!("Path {path} not found in tar gz") } -fn download_and_unpack_tar_gz(url: &str, path: &str) -> anyhow::Result> { - let resp = download(url).context(format!("Failed to get ok response from {url}"))?; - let data = unpack_tar_gz(resp, path) - .context(format!("Failed to download or unpack {path} out of {url}"))?; - Ok(data) +fn sha256_hex(bytes: &[u8]) -> String { + let digest = Sha256::digest(bytes); + let mut s = String::with_capacity(64); + for b in digest { + write!(&mut s, "{b:02x}").unwrap(); + } + s } -/// (url, `path_in_targz`, `expected_hash`) -type BinaryDownload = (&'static str, &'static str, u128); +struct BinaryDownload { + /// Identifier used both as the on-disk filename in `OUT_DIR` and as the + /// env-var prefix consumed by `artifact!($name)` at runtime. + name: &'static str, + /// GitHub release asset URL. + url: &'static str, + /// Path of the binary within the tarball. + path_in_targz: &'static str, + /// SHA-256 of the extracted binary. Doubles as the cache key: an + /// already-extracted binary in `OUT_DIR` whose content hashes to this + /// value is reused without hitting the network. + expected_sha256: &'static str, +} const MACOS_BINARY_DOWNLOADS: &[(&str, &[BinaryDownload])] = &[ ( "aarch64", &[ - ( - "https://github.com/branchseer/oils-for-unix-build/releases/download/oils-for-unix-0.37.0/oils-for-unix-0.37.0-darwin-arm64.tar.gz", - "oils-for-unix", - 282_073_174_065_923_237_490_435_663_309_538_399_576, - ), - ( - "https://github.com/uutils/coreutils/releases/download/0.4.0/coreutils-0.4.0-aarch64-apple-darwin.tar.gz", - "coreutils-0.4.0-aarch64-apple-darwin/coreutils", - 35_998_406_686_137_668_997_937_014_088_186_935_383, - ), + // https://github.com/branchseer/oils-for-unix-build/releases/tag/oils-for-unix-0.37.0 + BinaryDownload { + name: "oils_for_unix", + url: "https://github.com/branchseer/oils-for-unix-build/releases/download/oils-for-unix-0.37.0/oils-for-unix-0.37.0-darwin-arm64.tar.gz", + path_in_targz: "oils-for-unix", + expected_sha256: "ce4bb80b15f0a0371af08b19b65bfa5ea17d30429ebb911f487de3d2bcc7a07d", + }, + // https://github.com/uutils/coreutils/releases/tag/0.4.0 + BinaryDownload { + name: "coreutils", + url: "https://github.com/uutils/coreutils/releases/download/0.4.0/coreutils-0.4.0-aarch64-apple-darwin.tar.gz", + path_in_targz: "coreutils-0.4.0-aarch64-apple-darwin/coreutils", + expected_sha256: "8e8f38d9323135a19a73d617336fce85380f3c46fcb83d3ae3e031d1c0372f21", + }, ], ), ( "x86_64", &[ - ( - "https://github.com/branchseer/oils-for-unix-build/releases/download/oils-for-unix-0.37.0/oils-for-unix-0.37.0-darwin-x86_64.tar.gz", - "oils-for-unix", - 142_673_558_272_427_867_831_039_361_796_426_010_330, - ), - ( - "https://github.com/uutils/coreutils/releases/download/0.4.0/coreutils-0.4.0-x86_64-apple-darwin.tar.gz", - "coreutils-0.4.0-x86_64-apple-darwin/coreutils", - 120_898_281_113_671_104_995_723_556_995_187_526_689, - ), + // https://github.com/branchseer/oils-for-unix-build/releases/tag/oils-for-unix-0.37.0 + BinaryDownload { + name: "oils_for_unix", + url: "https://github.com/branchseer/oils-for-unix-build/releases/download/oils-for-unix-0.37.0/oils-for-unix-0.37.0-darwin-x86_64.tar.gz", + path_in_targz: "oils-for-unix", + expected_sha256: "cf1a95993127770e2a5fff277cd256a2bb28cf97d7f83ae42fdccc172cdb540d", + }, + // https://github.com/uutils/coreutils/releases/tag/0.4.0 + BinaryDownload { + name: "coreutils", + url: "https://github.com/uutils/coreutils/releases/download/0.4.0/coreutils-0.4.0-x86_64-apple-darwin.tar.gz", + path_in_targz: "coreutils-0.4.0-x86_64-apple-darwin/coreutils", + expected_sha256: "6be8bee6e8b91fc44a465203b9cc30538af00084b6657dc136d9e55837753eb1", + }, ], ), ]; -fn fetch_macos_binaries() -> anyhow::Result<()> { +fn fetch_macos_binaries(out_dir: &Path) -> anyhow::Result<()> { if env::var("CARGO_CFG_TARGET_OS").unwrap() != "macos" { return Ok(()); } - let out_dir = current_dir().unwrap().join(Path::new(&std::env::var_os("OUT_DIR").unwrap())); - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let downloads = MACOS_BINARY_DOWNLOADS .iter() .find(|(arch, _)| *arch == target_arch) .context(format!("Unsupported macOS arch: {target_arch}"))? .1; - // let downloads = [(zsh_url.as_str(), "bin/zsh", zsh_hash)]; - for (url, path_in_targz, expected_hash) in downloads.iter().copied() { - let filename = path_in_targz.split('/').next_back().unwrap(); - let download_path = out_dir.join(filename); - let hash_path = out_dir.join(format!("{filename}.hash")); - let file_exists = matches!(fs::read(&download_path), Ok(existing_file_data) if xxh3_128(&existing_file_data) == expected_hash); - if !file_exists { - let data = download_and_unpack_tar_gz(url, path_in_targz)?; - fs::write(&download_path, &data).context(format!( - "Saving {path_in_targz} in {url} to {}", - download_path.display() - ))?; - let actual_hash = xxh3_128(&data); + for BinaryDownload { name, url, path_in_targz, expected_sha256 } in downloads { + let dest = out_dir.join(name); + // Cache hit: an already-extracted binary whose contents hash to + // `expected_sha256` is known-good and reused without redownloading. + let cached = matches!( + fs::read(&dest), + Ok(existing) if sha256_hex(&existing) == *expected_sha256, + ); + if !cached { + let tarball = download(url).context(format!("Failed to download {url}"))?; + let data = unpack_tar_gz(Cursor::new(tarball), path_in_targz) + .context(format!("Failed to extract {path_in_targz} from {url}"))?; + let actual_sha256 = sha256_hex(&data); assert_eq!( - actual_hash, expected_hash, - "expected_hash of {path_in_targz} in {url} needs to be updated" + &actual_sha256, expected_sha256, + "sha256 of {path_in_targz} in {url} does not match — update expected value in MACOS_BINARY_DOWNLOADS", ); + fs::write(&dest, &data).with_context(|| format!("writing {}", dest.display()))?; } - fs::write(&hash_path, format!("{expected_hash:x}"))?; + materialized_artifact_build::register(name, &dest); } Ok(()) - // let zsh_path = ensure_downloaded(&zsh_url); +} + +fn register_preload_cdylib() -> anyhow::Result<()> { + let env_name = match env::var("CARGO_CFG_TARGET_OS").unwrap().as_str() { + "windows" => "CARGO_CDYLIB_FILE_FSPY_PRELOAD_WINDOWS", + _ if env::var("CARGO_CFG_TARGET_ENV").unwrap() == "musl" => return Ok(()), + _ => "CARGO_CDYLIB_FILE_FSPY_PRELOAD_UNIX", + }; + // The cdylib path is content-addressed by cargo; when its content changes + // the path changes. Track it so we re-publish the hash on update. + println!("cargo:rerun-if-env-changed={env_name}"); + let dylib_path = env::var_os(env_name).with_context(|| format!("{env_name} not set"))?; + materialized_artifact_build::register("fspy_preload", Path::new(&dylib_path)); + Ok(()) } fn main() -> anyhow::Result<()> { println!("cargo:rerun-if-changed=build.rs"); - fetch_macos_binaries().context("Failed to fetch macOS binaries")?; + let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap()); + fetch_macos_binaries(&out_dir).context("Failed to fetch macOS binaries")?; + register_preload_cdylib().context("Failed to register preload cdylib")?; Ok(()) } diff --git a/crates/fspy/src/artifact.rs b/crates/fspy/src/artifact.rs deleted file mode 100644 index 0c3fcba0..00000000 --- a/crates/fspy/src/artifact.rs +++ /dev/null @@ -1,60 +0,0 @@ -use std::{ - fs::{self, OpenOptions}, - io::{self, Write}, - path::{Path, PathBuf}, -}; - -/// An artifact (e.g., a DLL or shared library) whose content is embedded and needs to be written to disk. -pub struct Artifact { - pub name: &'static str, - pub content: &'static [u8], - pub hash: &'static str, -} - -#[cfg(target_os = "macos")] -#[doc(hidden)] -#[macro_export] -macro_rules! artifact { - ($name: literal) => { - $crate::artifact::Artifact::new( - $name, - ::core::include_bytes!(::core::concat!(::core::env!("OUT_DIR"), "/", $name)), - ::core::include_str!(::core::concat!(::core::env!("OUT_DIR"), "/", $name, ".hash")), - ) - }; -} - -#[cfg(target_os = "macos")] -pub use artifact; - -impl Artifact { - #[cfg(not(target_os = "linux"))] - pub const fn new(name: &'static str, content: &'static [u8], hash: &'static str) -> Self { - Self { name, content, hash } - } - - pub fn write_to(&self, dir: impl AsRef, suffix: &str) -> io::Result { - let dir = dir.as_ref(); - let path = dir.join(format!("{}_{}{}", self.name, self.hash, suffix)); - - if fs::exists(&path)? { - return Ok(path); - } - let tmp_path = dir.join(format!("{:x}", rand::random::())); - let mut tmp_file_open_options = OpenOptions::new(); - tmp_file_open_options.write(true).create_new(true); - #[cfg(unix)] - std::os::unix::fs::OpenOptionsExt::mode(&mut tmp_file_open_options, 0o755); // executable - let mut tmp_file = tmp_file_open_options.open(&tmp_path)?; - tmp_file.write_all(self.content)?; - drop(tmp_file); - - if let Err(err) = fs::rename(&tmp_path, &path) { - if !fs::exists(&path)? { - return Err(err); - } - fs::remove_file(&tmp_path)?; - } - Ok(path) - } -} diff --git a/crates/fspy/src/lib.rs b/crates/fspy/src/lib.rs index 7acabe79..13ff2055 100644 --- a/crates/fspy/src/lib.rs +++ b/crates/fspy/src/lib.rs @@ -1,11 +1,6 @@ #![cfg_attr(target_os = "windows", feature(windows_process_extensions_main_thread_handle))] #![feature(once_cell_try)] -// Persist the injected DLL/shared library somewhere in the filesystem. -// Not needed on musl (seccomp-only tracking). -#[cfg(not(target_env = "musl"))] -mod artifact; - pub mod error; #[cfg(not(target_env = "musl"))] diff --git a/crates/fspy/src/unix/macos_artifacts.rs b/crates/fspy/src/unix/macos_artifacts.rs index 70ee101e..17b014bd 100644 --- a/crates/fspy/src/unix/macos_artifacts.rs +++ b/crates/fspy/src/unix/macos_artifacts.rs @@ -1,7 +1,7 @@ -use crate::artifact::{Artifact, artifact}; +use materialized_artifact::{Artifact, artifact}; pub const COREUTILS_BINARY: Artifact = artifact!("coreutils"); -pub const OILS_BINARY: Artifact = artifact!("oils-for-unix"); +pub const OILS_BINARY: Artifact = artifact!("oils_for_unix"); #[cfg(test)] mod tests { @@ -14,7 +14,7 @@ mod tests { #[test] fn coreutils_functions() { let tmpdir = tempfile::tempdir().unwrap(); - let coreutils_path = COREUTILS_BINARY.write_to(&tmpdir, "").unwrap(); + let coreutils_path = COREUTILS_BINARY.materialize().executable().at(&tmpdir).unwrap(); let output = Command::new(coreutils_path).arg("--list").output().unwrap(); let mut expected_functions: Vec<&str> = output .stdout diff --git a/crates/fspy/src/unix/mod.rs b/crates/fspy/src/unix/mod.rs index ba051630..f01f63b5 100644 --- a/crates/fspy/src/unix/mod.rs +++ b/crates/fspy/src/unix/mod.rs @@ -37,9 +37,6 @@ pub struct SpyImpl { preload_path: Box, } -#[cfg(not(target_env = "musl"))] -const PRELOAD_CDYLIB_BINARY: &[u8] = include_bytes!(env!("CARGO_CDYLIB_FILE_FSPY_PRELOAD_UNIX")); - impl SpyImpl { /// Initialize the fs access spy by writing the preload library on disk. /// @@ -48,18 +45,11 @@ impl SpyImpl { pub fn init_in(#[cfg_attr(target_env = "musl", allow(unused))] dir: &Path) -> io::Result { #[cfg(not(target_env = "musl"))] let preload_path = { - use const_format::formatcp; - use xxhash_rust::const_xxh3::xxh3_128; - - use crate::artifact::Artifact; + use materialized_artifact::{Artifact, artifact}; - const PRELOAD_CDYLIB: Artifact = Artifact { - name: "fspy_preload", - content: PRELOAD_CDYLIB_BINARY, - hash: formatcp!("{:x}", xxh3_128(PRELOAD_CDYLIB_BINARY)), - }; + const PRELOAD_CDYLIB: Artifact = artifact!("fspy_preload"); - let preload_cdylib_path = PRELOAD_CDYLIB.write_to(dir, ".dylib")?; + let preload_cdylib_path = PRELOAD_CDYLIB.materialize().suffix(".dylib").at(dir)?; preload_cdylib_path.as_path().into() }; @@ -68,8 +58,9 @@ impl SpyImpl { preload_path, #[cfg(target_os = "macos")] artifacts: { - let coreutils_path = macos_artifacts::COREUTILS_BINARY.write_to(dir, "")?; - let bash_path = macos_artifacts::OILS_BINARY.write_to(dir, "")?; + let coreutils_path = + macos_artifacts::COREUTILS_BINARY.materialize().executable().at(dir)?; + let bash_path = macos_artifacts::OILS_BINARY.materialize().executable().at(dir)?; Artifacts { bash_path: bash_path.as_path().into(), coreutils_path: coreutils_path.as_path().into(), diff --git a/crates/fspy/src/windows/mod.rs b/crates/fspy/src/windows/mod.rs index 93bef864..8081e129 100644 --- a/crates/fspy/src/windows/mod.rs +++ b/crates/fspy/src/windows/mod.rs @@ -6,35 +6,28 @@ use std::{ sync::Arc, }; -use const_format::formatcp; use fspy_detours_sys::{DetourCopyPayloadToProcess, DetourUpdateProcessWithDll}; use fspy_shared::{ ipc::{PathAccess, channel::channel}, windows::{PAYLOAD_ID, Payload}, }; use futures_util::FutureExt; +use materialized_artifact::{Artifact, artifact}; use tokio_util::sync::CancellationToken; use winapi::{ shared::minwindef::TRUE, um::{processthreadsapi::ResumeThread, winbase::CREATE_SUSPENDED}, }; use winsafe::co::{CP, WC}; -use xxhash_rust::const_xxh3::xxh3_128; use crate::{ ChildTermination, TrackedChild, - artifact::Artifact, command::Command, error::SpawnError, ipc::{OwnedReceiverLockGuard, SHM_CAPACITY}, }; -const PRELOAD_CDYLIB_BINARY: &[u8] = include_bytes!(env!("CARGO_CDYLIB_FILE_FSPY_PRELOAD_WINDOWS")); -const INTERPOSE_CDYLIB: Artifact = Artifact::new( - "fsyp_preload", - PRELOAD_CDYLIB_BINARY, - formatcp!("{:x}", xxh3_128(PRELOAD_CDYLIB_BINARY)), -); +const INTERPOSE_CDYLIB: Artifact = artifact!("fspy_preload"); pub struct PathAccessIterable { ipc_receiver_lock_guard: OwnedReceiverLockGuard, @@ -58,7 +51,7 @@ pub struct SpyImpl { impl SpyImpl { pub fn init_in(path: &Path) -> io::Result { - let dll_path = INTERPOSE_CDYLIB.write_to(path, ".dll").unwrap(); + let dll_path = INTERPOSE_CDYLIB.materialize().suffix(".dll").at(path)?; let wide_dll_path = dll_path.as_os_str().encode_wide().collect::>(); let mut ansi_dll_path = diff --git a/crates/fspy_preload_unix/src/lib.rs b/crates/fspy_preload_unix/src/lib.rs index 9728cd98..42bf9e9c 100644 --- a/crates/fspy_preload_unix/src/lib.rs +++ b/crates/fspy_preload_unix/src/lib.rs @@ -1,6 +1,7 @@ -// On musl targets, fspy_preload_unix is not needed since we can track accesses via seccomp-only. -// Compile as an empty crate to avoid build failures from missing libc symbols. -#![cfg_attr(not(target_env = "musl"), feature(c_variadic))] +// Compile as an empty crate on non-unix targets and on musl (where seccomp +// alone handles access tracking). Guarding the feature gate keeps rustc from +// warning about unused features on those targets. +#![cfg_attr(all(unix, not(target_env = "musl")), feature(c_variadic))] #[cfg(all(unix, not(target_env = "musl")))] mod client; diff --git a/crates/materialized_artifact/.clippy.toml b/crates/materialized_artifact/.clippy.toml new file mode 120000 index 00000000..c7929b36 --- /dev/null +++ b/crates/materialized_artifact/.clippy.toml @@ -0,0 +1 @@ +../../.non-vite.clippy.toml \ No newline at end of file diff --git a/crates/materialized_artifact/Cargo.toml b/crates/materialized_artifact/Cargo.toml new file mode 100644 index 00000000..643c40a1 --- /dev/null +++ b/crates/materialized_artifact/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "materialized_artifact" +version = "0.0.0" +edition.workspace = true +license.workspace = true +publish = false +rust-version.workspace = true + +[dependencies] +tempfile = { workspace = true } + +[lints] +workspace = true + +[lib] +doctest = false +test = false diff --git a/crates/materialized_artifact/README.md b/crates/materialized_artifact/README.md new file mode 100644 index 00000000..60b505d3 --- /dev/null +++ b/crates/materialized_artifact/README.md @@ -0,0 +1,8 @@ +# materialized_artifact + +Materialize a compile-time–embedded file to disk on demand, for APIs that +need a filesystem path (`LoadLibrary`, `LD_PRELOAD`, helper binaries) rather +than the bytes you'd get from `include_bytes!`. The on-disk filename is +content-addressed so repeated calls skip writing, multiple versions coexist, +and stale files are never mistaken for current ones. See crate-level docs +for details. diff --git a/crates/materialized_artifact/src/lib.rs b/crates/materialized_artifact/src/lib.rs new file mode 100644 index 00000000..7380129e --- /dev/null +++ b/crates/materialized_artifact/src/lib.rs @@ -0,0 +1,199 @@ +//! Materialize a compile-time–embedded file to disk on demand. +//! +//! Some APIs need a file on disk — `LoadLibrary` and `LD_PRELOAD` take a +//! path, and helper binaries have to exist as actual files to be spawned — +//! but we want to ship a single executable. `materialized_artifact` embeds +//! the file content as a `&'static [u8]` at compile time via the +//! [`artifact!`] macro (same as `include_bytes!`), and [`Materialize::at`] +//! writes it out to disk when first needed — that materialization step is +//! the value-add over a bare `include_bytes!`. +//! +//! Materialized files are named `{name}_{hash}{suffix}` in the caller-chosen +//! directory. The hash (computed at build time by +//! `materialized_artifact_build::register`) gives three properties without +//! any coordination between processes: +//! +//! - **No repeated writes.** [`Materialize::at`] returns the existing path if +//! the file is already there; repeated calls and re-runs skip I/O. +//! - **Correctness.** Two binaries with different embedded content produce +//! different filenames, so a stale file from an older build is never +//! mistaken for the current one. +//! - **Coexistence.** Multiple versions of a materialized artifact (e.g. from +//! different builds of the host program on the same machine) share `dir` +//! without overwriting each other. + +use std::{ + fs, + io::{self, Write}, + path::{Path, PathBuf}, +}; + +/// A file embedded into the executable at compile time. +/// +/// Construct with [`artifact!`]; materialize to disk via +/// [`Artifact::materialize`] + [`Materialize::at`]. See the [crate docs] for +/// the design rationale. +/// +/// [crate docs]: crate +#[derive(Clone, Copy)] +pub struct Artifact { + name: &'static str, + content: &'static [u8], + hash: &'static str, +} + +/// Construct an [`Artifact`] from the env vars published by a build script +/// via `materialized_artifact_build::register`. +#[macro_export] +macro_rules! artifact { + ($name:literal) => { + $crate::Artifact::__new( + $name, + ::core::include_bytes!(::core::env!(::core::concat!( + "MATERIALIZED_ARTIFACT_", + $name, + "_PATH" + ))), + ::core::env!(::core::concat!("MATERIALIZED_ARTIFACT_", $name, "_HASH")), + ) + }; +} + +impl Artifact { + #[doc(hidden)] + #[must_use] + pub const fn __new(name: &'static str, content: &'static [u8], hash: &'static str) -> Self { + Self { name, content, hash } + } + + /// Start a fluent materialize chain. Supply optional [`Materialize::suffix`] + /// / [`Materialize::executable`] knobs, then terminate with + /// [`Materialize::at`]. + pub const fn materialize(&self) -> Materialize<'static> { + Materialize { + artifact: *self, + suffix: "", + #[cfg(unix)] + executable: false, + } + } +} + +/// Builder returned by [`Artifact::materialize`]. Terminate with +/// [`Materialize::at`] to write the file. +#[derive(Clone, Copy)] +#[must_use = "materialize() only configures — call .at(dir) to write the file"] +pub struct Materialize<'a> { + artifact: Artifact, + suffix: &'a str, + #[cfg(unix)] + executable: bool, +} + +impl Materialize<'_> { + /// Filename suffix appended after `{name}_{hash}` (e.g. `.dll`, `.dylib`). + /// Defaults to empty. + pub const fn suffix(self, suffix: &str) -> Materialize<'_> { + Materialize { + artifact: self.artifact, + suffix, + #[cfg(unix)] + executable: self.executable, + } + } + + /// Mark the materialized file as executable (`0o755` on Unix; no-op on + /// Windows where the filesystem has no executable bit). + #[cfg_attr(not(unix), expect(unused_mut, reason = "executable is Unix-only"))] + pub const fn executable(mut self) -> Self { + #[cfg(unix)] + { + self.executable = true; + } + self + } + + /// Materialize the artifact in `dir` under a content-addressed filename, + /// writing it if missing. On Unix, newly created files get `0o755` when + /// [`Materialize::executable`] was called and `0o644` otherwise, and an + /// existing file's mode is reconciled if it drifted. + /// + /// Returns the final path. If the target already exists and its mode + /// already matches, no I/O beyond the stat is performed. + /// + /// # Preconditions + /// + /// `dir` must already exist — this method does not create it. + /// + /// # Errors + /// + /// Returns an error if the directory can't be read/written, the stat + /// fails for any reason other than not-found, or the temp-file rename + /// fails and the destination still doesn't exist. + pub fn at(self, dir: impl AsRef) -> io::Result { + let dir = dir.as_ref(); + let path = + dir.join(format!("{}_{}{}", self.artifact.name, self.artifact.hash, self.suffix)); + + #[cfg(unix)] + let want_mode: u32 = if self.executable { 0o755 } else { 0o644 }; + + // Fast path: one stat tells us both whether the file exists and, + // on Unix, what its permission bits are. The content is assumed + // correct because the hash is in the filename, so there is nothing + // else to verify. + match fs::metadata(&path) { + #[cfg(unix)] + Ok(meta) => { + use std::os::unix::fs::PermissionsExt; + // Reconcile a drifted mode (e.g. someone chmod'd it away) + // but skip the syscall when it already matches. + if meta.permissions().mode() & 0o777 != want_mode { + fs::set_permissions(&path, fs::Permissions::from_mode(want_mode))?; + } + return Ok(path); + } + // On non-Unix there is no mode to reconcile; existence alone is + // enough to declare success. + #[cfg(not(unix))] + Ok(_) => return Ok(path), + // Not found: fall through to the create-and-rename path. + Err(err) if err.kind() == io::ErrorKind::NotFound => {} + // Any other stat failure (permission denied, I/O error, etc.) + // propagates — we can't reason about what's on disk. + Err(err) => return Err(err), + } + + // Slow path: write to a unique temp file in the same directory, then + // rename into place atomically. The temp must live in `dir` (not the + // system temp) so the final rename stays within one filesystem — cross- + // filesystem rename isn't atomic. `NamedTempFile`'s `Drop` removes the + // temp on any early return, so we never leak partial files on error. + #[cfg(unix)] + let mut tmp = { + use std::os::unix::fs::PermissionsExt; + // `Builder::permissions` sets the mode at open(2) time, so there's + // no window where the temp exists with the wrong bits. + tempfile::Builder::new() + .permissions(fs::Permissions::from_mode(want_mode)) + .tempfile_in(dir)? + }; + #[cfg(not(unix))] + let mut tmp = tempfile::NamedTempFile::new_in(dir)?; + tmp.as_file_mut().write_all(self.artifact.content)?; + + // `persist_noclobber` (link+unlink on Unix, MoveFileExW without + // REPLACE_EXISTING on Windows) fails atomically if the destination + // already exists — so two racing processes can't clobber each other + // mid-write, and the loser sees the error below. + if let Err(err) = tmp.persist_noclobber(&path) { + // If another process won the race and the destination now exists, + // treat that as success; `err.file` drops here, cleaning up our + // temp. Otherwise propagate the original error. + if !fs::exists(&path)? { + return Err(err.error); + } + } + Ok(path) + } +} diff --git a/crates/materialized_artifact_build/.clippy.toml b/crates/materialized_artifact_build/.clippy.toml new file mode 120000 index 00000000..c7929b36 --- /dev/null +++ b/crates/materialized_artifact_build/.clippy.toml @@ -0,0 +1 @@ +../../.non-vite.clippy.toml \ No newline at end of file diff --git a/crates/materialized_artifact_build/Cargo.toml b/crates/materialized_artifact_build/Cargo.toml new file mode 100644 index 00000000..c2d5dbd3 --- /dev/null +++ b/crates/materialized_artifact_build/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "materialized_artifact_build" +version = "0.0.0" +edition.workspace = true +license.workspace = true +publish = false +rust-version.workspace = true + +[dependencies] +xxhash-rust = { workspace = true, features = ["xxh3"] } + +[lints] +workspace = true + +[lib] +doctest = false +test = false diff --git a/crates/materialized_artifact_build/README.md b/crates/materialized_artifact_build/README.md new file mode 100644 index 00000000..7f727fad --- /dev/null +++ b/crates/materialized_artifact_build/README.md @@ -0,0 +1,4 @@ +# materialized_artifact_build + +Build-script helper for publishing artifacts consumed by +`materialized_artifact`'s `artifact!` macro. diff --git a/crates/materialized_artifact_build/src/lib.rs b/crates/materialized_artifact_build/src/lib.rs new file mode 100644 index 00000000..cc6c4fc7 --- /dev/null +++ b/crates/materialized_artifact_build/src/lib.rs @@ -0,0 +1,37 @@ +use std::{fs, path::Path}; + +/// Namespace prefix for the env vars set by [`register`] and consumed by +/// `materialized_artifact`'s `artifact!` macro. Exported so both crates agree +/// on the same prefix. +pub const ENV_PREFIX: &str = "MATERIALIZED_ARTIFACT_"; + +/// Publish an artifact at `path` so `materialized_artifact`'s `artifact!($name)` +/// macro can embed it. +/// +/// Emits three `cargo:…` directives: +/// `rerun-if-changed={path}`, +/// `rustc-env=MATERIALIZED_ARTIFACT_{name}_PATH={path}`, and +/// `rustc-env=MATERIALIZED_ARTIFACT_{name}_HASH={hex}`. The runtime resolves +/// these at compile time via `include_bytes!(env!(…))` and `env!(…)`. +/// +/// `name` is used both as the env-var key and as the on-disk filename prefix +/// (in `Materialize::at`), so it must be a valid identifier-like string +/// that matches the one passed to `artifact!`. +/// +/// # Panics +/// +/// Panics if `path` is not valid UTF-8 or cannot be read. +pub fn register(name: &str, path: &Path) { + let path_str = path.to_str().expect("artifact path must be valid UTF-8"); + #[expect(clippy::print_stdout, reason = "cargo build-script directives")] + { + // Emit rerun-if-changed before reading so cargo still sees it even if + // reading the file below panics. + println!("cargo:rerun-if-changed={path_str}"); + let bytes = + fs::read(path).unwrap_or_else(|e| panic!("failed to read artifact at {path_str}: {e}")); + let hash = format!("{:x}", xxhash_rust::xxh3::xxh3_128(&bytes)); + println!("cargo:rustc-env={ENV_PREFIX}{name}_PATH={path_str}"); + println!("cargo:rustc-env={ENV_PREFIX}{name}_HASH={hash}"); + } +}