From 52686409fecbe3998e330ccbd2b02b7f7c019438 Mon Sep 17 00:00:00 2001 From: ubugeeei Date: Mon, 18 May 2026 14:54:16 +0900 Subject: [PATCH] Finish multipart production hardening --- .github/workflows/ci.yml | 21 ++++ .github/workflows/release_bindings.yml | 73 +++++++++++ README.md | 31 ++++- crates/fastmulp_core/src/boundary.rs | 47 ++++--- crates/fastmulp_core/src/error.rs | 15 +++ crates/fastmulp_core/src/lib.rs | 2 +- crates/fastmulp_core/src/parser.rs | 77 +++++++++++- crates/fastmulp_core/src/tests.rs | 5 +- crates/fastmulp_core/tests/content_type.rs | 21 +++- crates/fastmulp_core/tests/spec_invalid.rs | 76 +++++++++++- crates/fastmulp_napi/src/lib.rs | 11 +- crates/fastmulp_wasm/src/lib.rs | 2 +- package.json | 5 +- packages/fastmulp-node/README.md | 10 ++ packages/fastmulp-node/index.d.ts | 19 +++ packages/fastmulp-node/index.js | 3 + packages/fastmulp-node/package.json | 21 ++++ packages/fastmulp-wasm/README.md | 10 ++ packages/fastmulp-wasm/package.json | 28 +++++ scripts/install-wasm-bindgen.sh | 24 ++++ scripts/package-bindings.mjs | 136 +++++++++++++++++++++ scripts/test-node-binding.mjs | 75 ++++++++++++ scripts/test-wasm-binding.mjs | 59 +++++++++ 23 files changed, 731 insertions(+), 40 deletions(-) create mode 100644 .github/workflows/release_bindings.yml create mode 100644 packages/fastmulp-node/README.md create mode 100644 packages/fastmulp-node/index.d.ts create mode 100644 packages/fastmulp-node/index.js create mode 100644 packages/fastmulp-node/package.json create mode 100644 packages/fastmulp-wasm/README.md create mode 100644 packages/fastmulp-wasm/package.json create mode 100644 scripts/install-wasm-bindgen.sh create mode 100644 scripts/package-bindings.mjs create mode 100644 scripts/test-node-binding.mjs create mode 100644 scripts/test-wasm-binding.mjs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4965e8a..e1fab19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,6 +83,27 @@ jobs: cargo test -p fastmulp-core --doc --locked cargo test -p fastmulp-wasm --doc --locked + bindings: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - uses: Swatinem/rust-cache@v2 + - uses: actions/setup-node@v6 + with: + node-version: "24" + - name: Enable pnpm + run: corepack enable + - name: Install wasm-bindgen CLI + run: bash scripts/install-wasm-bindgen.sh + - name: Node binding smoke test + run: pnpm test:node-binding + - name: Wasm binding smoke test + run: pnpm test:wasm-binding + bench: if: github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest diff --git a/.github/workflows/release_bindings.yml b/.github/workflows/release_bindings.yml new file mode 100644 index 0000000..7a3d6b3 --- /dev/null +++ b/.github/workflows/release_bindings.yml @@ -0,0 +1,73 @@ +name: release-bindings + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +defaults: + run: + shell: bash + +env: + CARGO_TERM_COLOR: always + +jobs: + publish-binding-artifacts: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - uses: Swatinem/rust-cache@v2 + - uses: actions/setup-node@v6 + with: + node-version: "24" + - name: Enable pnpm + run: corepack enable + - name: Verify tag is on main + run: git branch --remote --contains "${GITHUB_SHA}" | grep -qx ' origin/main' + - name: Verify tag matches workspace version + run: | + expected="${GITHUB_REF_NAME#v}" + actual="$(awk ' + /^\[workspace\.package\]/ { in_section=1; next } + /^\[/ && in_section { exit } + in_section && /^version = "/ { + gsub(/^version = "/, "", $0) + gsub(/"$/, "", $0) + print + exit + } + ' Cargo.toml)" + test "${expected}" = "${actual}" + - name: Build and test Node binding + run: pnpm test:node-binding + - name: Install wasm-bindgen CLI + run: bash scripts/install-wasm-bindgen.sh + - name: Test wasm binding + run: pnpm test:wasm-binding + - name: Package binding artifacts + run: node scripts/package-bindings.mjs --pack + - name: Checksum artifacts + run: | + cd dist/bindings/artifacts + sha256sum *.tgz > SHA256SUMS + - name: Create or update GitHub release + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release view "${GITHUB_REF_NAME}" >/dev/null 2>&1 \ + || gh release create "${GITHUB_REF_NAME}" --verify-tag --title "${GITHUB_REF_NAME}" + gh release upload "${GITHUB_REF_NAME}" dist/bindings/artifacts/*.tgz dist/bindings/artifacts/SHA256SUMS --clobber diff --git a/README.md b/README.md index cc38224..c94ac3a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ High-accuracy, low-allocation `multipart/form-data` parsing with a zero-copy Rus - The Rust core parses against a borrowed `&[u8]` and returns body ranges instead of copying payload bytes. - `Content-Disposition` is parsed eagerly, and `name` is enforced for `form-data` parts. - Header storage uses `SmallVec` so the common case stays stack-friendly. -- Node.js and browser bindings return metadata plus `body_start` / `body_end`, so callers can slice the original buffer themselves. +- Node.js and browser bindings return metadata plus body ranges, so callers can slice the original buffer themselves. - Boundary lines accept RFC 2046 transport padding, plus MIME-style preamble and epilogue. ## Spec Notes @@ -51,15 +51,14 @@ Node.js: import { parse } from "./fastmulp.node"; const parts = parse(bodyBuffer, boundary); -const fileBytes = bodyBuffer.subarray(parts[0].body_start, parts[0].body_end); +const fileBytes = bodyBuffer.subarray(parts[0].bodyStart, parts[0].bodyEnd); ``` Browser: ```ts -import init, { parse } from "./fastmulp_wasm.js"; +import { parse } from "fastmulp-wasm"; -await init(); const parts = parse(formBytes, boundary); const fieldBytes = formBytes.subarray(parts[0].body_start, parts[0].body_end); ``` @@ -68,6 +67,28 @@ The wasm target still needs one JS-to-wasm copy at the ABI boundary, but it avoi Older nested `multipart/mixed` payloads can be handled by recursively calling `parse` on a part body after extracting the nested boundary from that part's `Content-Type`. +### Parser Limits + +Use `parse_with_limits` or `MultipartParser::new_with_limits` when parsing untrusted uploads on a server boundary. `ParseLimits` can cap the number of parts, the number of headers per part, and the number of header bytes per part. The plain `parse` API stays unlimited for compatibility and small trusted payloads. + +### Security Notes + +Treat `filename` as display metadata only. Strip path separators and platform-specific path components before using it in storage, generate your own server-side object names, and keep the original value only as untrusted metadata. Preserve duplicate field names in order unless your application explicitly defines a merge rule. For nested `multipart/mixed`, extract the nested boundary from that part's `Content-Type` and parse the nested body with its own limits. + +### Binding Artifacts + +Tag pushes build npm-compatible release artifacts for the JS targets: + +- `fastmulp-node-linux-x64-*.tgz`: platform-specific Node.js native addon containing `fastmulp.node`, CommonJS entrypoint, and TypeScript declarations. +- `fastmulp-wasm-*.tgz`: browser wasm package containing wasm-bindgen generated JavaScript glue, wasm, and TypeScript declarations. + +The Node binding uses camelCase object fields such as `bodyStart`, `bodyEnd`, `fileName`, and `contentType`. The wasm binding preserves the existing snake_case field names generated by `wasm-bindgen`. + +```ts +import { parse as parseNode } from "fastmulp-node-linux-x64"; +import { parse as parseWasm } from "fastmulp-wasm"; +``` + ## Release - `vp run release:patch` @@ -75,7 +96,7 @@ Older nested `multipart/mixed` payloads can be handled by recursively calling `p - `vp run release:alpha` - `vp run release:beta` -Each release command updates the workspace version in `Cargo.toml`, creates a release commit, and creates the matching `v...` git tag. Tag pushes publish `fastmulp-core` through GitHub Actions trusted publishing. +Each release command updates the workspace version in `Cargo.toml`, creates a release commit, and creates the matching `v...` git tag. Tag pushes publish `fastmulp-core` through GitHub Actions trusted publishing and attach Node/wasm binding artifacts to the matching GitHub Release. ## Shared Tasks diff --git a/crates/fastmulp_core/src/boundary.rs b/crates/fastmulp_core/src/boundary.rs index 31b61c4..eb93ef6 100644 --- a/crates/fastmulp_core/src/boundary.rs +++ b/crates/fastmulp_core/src/boundary.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use smallvec::SmallVec; use crate::{ @@ -11,8 +13,12 @@ pub struct Boundary<'a> { opening: SmallVec<[u8; 72]>, } -pub fn boundary_from_content_type(_content_type: &str) -> Option<&str> { - let bytes = _content_type.as_bytes(); +/// Extracts the `boundary` parameter from a multipart `Content-Type` header value. +/// +/// The returned value is borrowed for ordinary parameters and owned when a +/// quoted-string contains MIME quoted-pair escapes that need to be decoded. +pub fn boundary_from_content_type(content_type: &str) -> Option> { + let bytes = content_type.as_bytes(); let mut cursor = skip_ascii_whitespace(bytes, 0); let media_start = cursor; @@ -52,17 +58,13 @@ pub fn boundary_from_content_type(_content_type: &str) -> Option<&str> { cursor += 1; cursor = skip_ascii_whitespace(bytes, cursor); - let value = parse_content_type_value(_content_type, cursor)?; + let value = parse_content_type_value(content_type, cursor)?; cursor = skip_ascii_whitespace(bytes, value.next); if cursor < bytes.len() && bytes[cursor] != b';' { return None; } if eq_ignore_ascii_case(name, b"boundary") { - if value.requires_unescape { - return None; - } - return Some(value.raw); } } @@ -106,9 +108,8 @@ impl<'a> Boundary<'a> { } struct ContentTypeValue<'a> { - raw: &'a str, + raw: Cow<'a, str>, next: usize, - requires_unescape: bool, } fn parse_content_type_value(content_type: &str, start: usize) -> Option> { @@ -120,26 +121,37 @@ fn parse_content_type_value(content_type: &str, start: usize) -> Option>; + let mut copied_from = inner_start; while cursor < bytes.len() { match bytes[cursor] { b'"' => { + if let Some(mut decoded) = decoded { + decoded.extend_from_slice(&bytes[copied_from..cursor]); + let raw = String::from_utf8(decoded).ok()?; + return Some(ContentTypeValue { + raw: Cow::Owned(raw), + next: cursor + 1, + }); + } + return content_type .get(inner_start..cursor) .map(|raw| ContentTypeValue { - raw, + raw: Cow::Borrowed(raw), next: cursor + 1, - requires_unescape, }); } b'\\' => { - requires_unescape = true; - cursor += 1; - if cursor == bytes.len() { + if cursor + 1 == bytes.len() { return None; } - cursor += 1; + let decoded = decoded.get_or_insert_with(|| Vec::with_capacity(bytes.len())); + decoded.extend_from_slice(&bytes[copied_from..cursor]); + decoded.push(bytes[cursor + 1]); + cursor += 2; + copied_from = cursor; } _ => { cursor += 1; @@ -157,9 +169,8 @@ fn parse_content_type_value(content_type: &str, start: usize) -> Option { + write!(f, "multipart part count exceeded configured limit {limit}") + } + Self::HeaderCountLimitExceeded { limit, offset } => write!( + f, + "multipart header count exceeded configured limit {limit} at byte offset {offset}" + ), + Self::HeaderBytesLimitExceeded { limit, offset } => write!( + f, + "multipart header bytes exceeded configured limit {limit} at byte offset {offset}" + ), } } } diff --git a/crates/fastmulp_core/src/lib.rs b/crates/fastmulp_core/src/lib.rs index f1f1e55..60e801a 100644 --- a/crates/fastmulp_core/src/lib.rs +++ b/crates/fastmulp_core/src/lib.rs @@ -26,7 +26,7 @@ mod util; pub use boundary::{Boundary, boundary_from_content_type}; pub use error::{Error, Result}; pub use header::Header; -pub use parser::{Multipart, MultipartParser, parse}; +pub use parser::{Multipart, MultipartParser, ParseLimits, parse, parse_with_limits}; pub use part::Part; pub use text::TextValue; diff --git a/crates/fastmulp_core/src/parser.rs b/crates/fastmulp_core/src/parser.rs index 7decdc5..a9c1502 100644 --- a/crates/fastmulp_core/src/parser.rs +++ b/crates/fastmulp_core/src/parser.rs @@ -25,19 +25,57 @@ impl<'a> Multipart<'a> { } } +/// Optional resource limits for parsing untrusted multipart bodies. +/// +/// Each `None` value is unlimited. `ParseLimits::default()` preserves the +/// historical unlimited behavior used by `parse` and `MultipartParser::new`. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct ParseLimits { + /// Maximum number of parts to return before parsing stops with an error. + pub max_parts: Option, + /// Maximum number of headers accepted in each part. + pub max_headers_per_part: Option, + /// Maximum aggregate header bytes accepted in each part, excluding CRLF. + pub max_header_bytes_per_part: Option, +} + +impl ParseLimits { + /// Returns a limit set with every guardrail disabled. + pub const fn unlimited() -> Self { + Self { + max_parts: None, + max_headers_per_part: None, + max_header_bytes_per_part: None, + } + } +} + pub struct MultipartParser<'a> { body: &'a [u8], boundary: Boundary<'a>, + limits: ParseLimits, + parts_seen: usize, cursor: usize, done: bool, } impl<'a> MultipartParser<'a> { pub fn new(body: &'a [u8], boundary: &'a [u8]) -> Result { + Self::new_with_limits(body, boundary, ParseLimits::default()) + } + + /// Creates an iterator parser with explicit resource limits. + pub fn new_with_limits( + body: &'a [u8], + boundary: &'a [u8], + limits: ParseLimits, + ) -> Result { let boundary = Boundary::new(boundary)?; let mut parser = Self { body, boundary, + limits, + parts_seen: 0, cursor: 0, done: false, }; @@ -78,8 +116,15 @@ impl<'a> MultipartParser<'a> { } fn parse_next_part(&mut self) -> Result> { + if let Some(limit) = self.limits.max_parts + && self.parts_seen >= limit + { + return Err(Error::PartLimitExceeded { limit }); + } + let part_offset = self.cursor; let mut headers = SmallVec::<[Header<'a>; 4]>::new(); + let mut header_bytes = 0usize; let mut saw_content_disposition = false; let mut require_name = false; let mut name = None; @@ -106,6 +151,25 @@ impl<'a> MultipartParser<'a> { break; } + header_bytes += line.len(); + if let Some(limit) = self.limits.max_header_bytes_per_part + && header_bytes > limit + { + return Err(Error::HeaderBytesLimitExceeded { + limit, + offset: line_start, + }); + } + + if let Some(limit) = self.limits.max_headers_per_part + && headers.len() >= limit + { + return Err(Error::HeaderCountLimitExceeded { + limit, + offset: line_start, + }); + } + if matches!(line[0], b' ' | b'\t') { return Err(Error::InvalidHeaderContinuation { offset: line_start }); } @@ -150,6 +214,8 @@ impl<'a> MultipartParser<'a> { self.done = true; } + self.parts_seen += 1; + Ok(Part::new( headers, body_start, @@ -177,7 +243,16 @@ impl<'a> Iterator for MultipartParser<'a> { } pub fn parse<'a>(body: &'a [u8], boundary: &'a [u8]) -> Result> { - let parser = MultipartParser::new(body, boundary)?; + parse_with_limits(body, boundary, ParseLimits::default()) +} + +/// Parses a complete multipart body with explicit resource limits. +pub fn parse_with_limits<'a>( + body: &'a [u8], + boundary: &'a [u8], + limits: ParseLimits, +) -> Result> { + let parser = MultipartParser::new_with_limits(body, boundary, limits)?; let mut parts = SmallVec::<[Part<'a>; 4]>::new(); for part in parser { parts.push(part?); diff --git a/crates/fastmulp_core/src/tests.rs b/crates/fastmulp_core/src/tests.rs index 4d28cb1..9f9bfa5 100644 --- a/crates/fastmulp_core/src/tests.rs +++ b/crates/fastmulp_core/src/tests.rs @@ -7,11 +7,12 @@ type TestResult = Result<(), Box>; #[test] fn extracts_boundary_from_content_type() { assert_eq!( - boundary_from_content_type("multipart/form-data; boundary=----WebKitFormBoundaryabc123"), + boundary_from_content_type("multipart/form-data; boundary=----WebKitFormBoundaryabc123") + .as_deref(), Some("----WebKitFormBoundaryabc123") ); assert_eq!( - boundary_from_content_type("multipart/form-data; boundary=\"quoted-boundary\""), + boundary_from_content_type("multipart/form-data; boundary=\"quoted-boundary\"").as_deref(), Some("quoted-boundary") ); assert_eq!(boundary_from_content_type("text/plain"), None); diff --git a/crates/fastmulp_core/tests/content_type.rs b/crates/fastmulp_core/tests/content_type.rs index 741e554..1f15b4a 100644 --- a/crates/fastmulp_core/tests/content_type.rs +++ b/crates/fastmulp_core/tests/content_type.rs @@ -3,14 +3,17 @@ use fastmulp_core::{Boundary, Error, boundary_from_content_type}; #[test] fn extracts_boundary_case_insensitively_and_skips_other_params() { let content_type = "Multipart/Form-Data; charset=UTF-8; boundary=abc123; foo=bar"; - assert_eq!(boundary_from_content_type(content_type), Some("abc123")); + assert_eq!( + boundary_from_content_type(content_type).as_deref(), + Some("abc123") + ); } #[test] fn extracts_quoted_boundary_with_optional_spacing() { let content_type = "multipart/form-data; foo=bar; boundary = \"quoted-boundary\""; assert_eq!( - boundary_from_content_type(content_type), + boundary_from_content_type(content_type).as_deref(), Some("quoted-boundary") ); } @@ -19,7 +22,7 @@ fn extracts_quoted_boundary_with_optional_spacing() { fn extracts_quoted_boundary_with_whitespace_before_next_parameter() { let content_type = "multipart/form-data; boundary=\"quoted-boundary\" \t ; charset=UTF-8"; assert_eq!( - boundary_from_content_type(content_type), + boundary_from_content_type(content_type).as_deref(), Some("quoted-boundary") ); } @@ -27,13 +30,19 @@ fn extracts_quoted_boundary_with_whitespace_before_next_parameter() { #[test] fn accepts_quoted_pair_escapes_while_scanning_parameters() { let content_type = "multipart/form-data; title=\"needs\\\"escape\"; boundary=abc123"; - assert_eq!(boundary_from_content_type(content_type), Some("abc123")); + assert_eq!( + boundary_from_content_type(content_type).as_deref(), + Some("abc123") + ); } #[test] -fn rejects_escaped_quoted_boundary_values_requiring_unescape() { +fn extracts_escaped_quoted_boundary_values_requiring_unescape() { let content_type = "multipart/form-data; boundary=\"abc\\:123\""; - assert_eq!(boundary_from_content_type(content_type), None); + assert_eq!( + boundary_from_content_type(content_type).as_deref(), + Some("abc:123") + ); } #[test] diff --git a/crates/fastmulp_core/tests/spec_invalid.rs b/crates/fastmulp_core/tests/spec_invalid.rs index de8cf2d..a91d71c 100644 --- a/crates/fastmulp_core/tests/spec_invalid.rs +++ b/crates/fastmulp_core/tests/spec_invalid.rs @@ -1,4 +1,4 @@ -use fastmulp_core::{Boundary, Error, parse}; +use fastmulp_core::{Boundary, Error, ParseLimits, parse, parse_with_limits}; fn assert_invalid_content_disposition(disposition: &str) { let body = @@ -180,3 +180,77 @@ fn rejects_non_form_data_disposition() { Err(Error::InvalidContentDisposition { .. }) )); } + +#[test] +fn rejects_part_count_over_configured_limit() { + let body = concat!( + "--abc123\r\n", + "Content-Disposition: form-data; name=\"first\"\r\n", + "\r\n", + "first\r\n", + "--abc123\r\n", + "Content-Disposition: form-data; name=\"second\"\r\n", + "\r\n", + "second\r\n", + "--abc123--\r\n", + ); + + assert!(matches!( + parse_with_limits( + body.as_bytes(), + b"abc123", + ParseLimits { + max_parts: Some(1), + ..ParseLimits::default() + }, + ), + Err(Error::PartLimitExceeded { limit: 1 }) + )); +} + +#[test] +fn rejects_header_count_over_configured_limit() { + let body = concat!( + "--abc123\r\n", + "Content-Disposition: form-data; name=\"file\"\r\n", + "Content-Type: text/plain\r\n", + "\r\n", + "payload\r\n", + "--abc123--\r\n", + ); + + assert!(matches!( + parse_with_limits( + body.as_bytes(), + b"abc123", + ParseLimits { + max_headers_per_part: Some(1), + ..ParseLimits::default() + }, + ), + Err(Error::HeaderCountLimitExceeded { limit: 1, .. }) + )); +} + +#[test] +fn rejects_header_bytes_over_configured_limit() { + let body = concat!( + "--abc123\r\n", + "Content-Disposition: form-data; name=\"field\"\r\n", + "\r\n", + "payload\r\n", + "--abc123--\r\n", + ); + + assert!(matches!( + parse_with_limits( + body.as_bytes(), + b"abc123", + ParseLimits { + max_header_bytes_per_part: Some(8), + ..ParseLimits::default() + }, + ), + Err(Error::HeaderBytesLimitExceeded { limit: 8, .. }) + )); +} diff --git a/crates/fastmulp_napi/src/lib.rs b/crates/fastmulp_napi/src/lib.rs index 0095b37..e70f33c 100644 --- a/crates/fastmulp_napi/src/lib.rs +++ b/crates/fastmulp_napi/src/lib.rs @@ -24,7 +24,7 @@ pub struct JsPart { #[napi] pub fn boundary_from_content_type(content_type: String) -> Option { - parse_boundary(&content_type).map(str::to_owned) + parse_boundary(&content_type).map(|boundary| boundary.into_owned()) } #[napi] @@ -55,6 +55,9 @@ fn parse_parts(body: &[u8], boundary: &[u8]) -> Result> { fn convert_part(part: &Part<'_>) -> Result { let body_range = part.body_range(); + let name = decode_optional_text(part.name(), "name")?; + let file_name = decode_optional_text(part.file_name(), "file_name")?; + let content_type = decode_optional_bytes(part.content_type(), "content_type")?; let mut headers = Vec::with_capacity(part.headers().len()); for header in part.headers() { headers.push(JsHeader { @@ -64,9 +67,9 @@ fn convert_part(part: &Part<'_>) -> Result { } Ok(JsPart { - name: decode_optional_text(part.name(), "name")?, - file_name: decode_optional_text(part.file_name(), "file_name")?, - content_type: decode_optional_bytes(part.content_type(), "content_type")?, + name, + file_name, + content_type, body_start: to_u32(body_range.start, "body_start")?, body_end: to_u32(body_range.end, "body_end")?, headers, diff --git a/crates/fastmulp_wasm/src/lib.rs b/crates/fastmulp_wasm/src/lib.rs index b3f053c..a96daae 100644 --- a/crates/fastmulp_wasm/src/lib.rs +++ b/crates/fastmulp_wasm/src/lib.rs @@ -8,7 +8,7 @@ use wasm_bindgen::{JsValue, prelude::wasm_bindgen}; #[wasm_bindgen] pub fn boundary_from_content_type(content_type: &str) -> Option { - parse_boundary(content_type).map(str::to_owned) + parse_boundary(content_type).map(|boundary| boundary.into_owned()) } #[wasm_bindgen] diff --git a/package.json b/package.json index 41bdc46..7de6776 100644 --- a/package.json +++ b/package.json @@ -10,9 +10,12 @@ "check:wasm": "cargo check -p fastmulp-wasm --target wasm32-unknown-unknown --locked", "test": "cargo test --workspace --lib --tests --examples --locked", "test:doc": "cargo test -p fastmulp-core --doc --locked && cargo test -p fastmulp-wasm --doc --locked", - "ci:local": "cargo metadata --locked --format-version 1 >/dev/null && pnpm fmt:check && pnpm check && pnpm check:wasm && pnpm lint && pnpm test && pnpm test:doc", + "test:node-binding": "cargo build -p fastmulp-napi --release --locked && node scripts/test-node-binding.mjs", + "test:wasm-binding": "pnpm build:wasm && node scripts/test-wasm-binding.mjs", + "ci:local": "cargo metadata --locked --format-version 1 >/dev/null && pnpm fmt:check && pnpm check && pnpm check:wasm && pnpm lint && pnpm test && pnpm test:doc && pnpm test:node-binding && pnpm test:wasm-binding", "bench": "cargo bench -p fastmulp-core --bench parse --locked", "package:core": "cargo package -p fastmulp-core --locked --allow-dirty --list && cargo publish -p fastmulp-core --locked --allow-dirty --dry-run", + "package:bindings": "pnpm build:node && pnpm build:wasm && node scripts/package-bindings.mjs --dry-run", "release:patch": "./scripts/release.sh patch", "release:minor": "./scripts/release.sh minor", "release:alpha": "./scripts/release.sh alpha", diff --git a/packages/fastmulp-node/README.md b/packages/fastmulp-node/README.md new file mode 100644 index 0000000..b0ce13f --- /dev/null +++ b/packages/fastmulp-node/README.md @@ -0,0 +1,10 @@ +# fastmulp Node.js binding + +Native Node.js package for `fastmulp`. The release workflow packages this template with the platform-specific `fastmulp.node` binary and TypeScript declarations. + +```js +const { parse } = require("fastmulp-node-linux-x64"); + +const parts = parse(bodyBuffer, boundary); +const fileBytes = bodyBuffer.subarray(parts[0].bodyStart, parts[0].bodyEnd); +``` diff --git a/packages/fastmulp-node/index.d.ts b/packages/fastmulp-node/index.d.ts new file mode 100644 index 0000000..fe7e7e0 --- /dev/null +++ b/packages/fastmulp-node/index.d.ts @@ -0,0 +1,19 @@ +export interface Header { + name: string; + value: string; +} + +export interface Part { + name?: string; + fileName?: string; + contentType?: string; + bodyStart: number; + bodyEnd: number; + headers: Header[]; +} + +export function boundaryFromContentType(contentType: string): string | undefined; + +export function parse(body: Uint8Array, boundary: string): Part[]; + +export function parseContentType(body: Uint8Array, contentType: string): Part[]; diff --git a/packages/fastmulp-node/index.js b/packages/fastmulp-node/index.js new file mode 100644 index 0000000..1da928f --- /dev/null +++ b/packages/fastmulp-node/index.js @@ -0,0 +1,3 @@ +"use strict"; + +module.exports = require("./fastmulp.node"); diff --git a/packages/fastmulp-node/package.json b/packages/fastmulp-node/package.json new file mode 100644 index 0000000..fb7d782 --- /dev/null +++ b/packages/fastmulp-node/package.json @@ -0,0 +1,21 @@ +{ + "name": "fastmulp-node", + "version": "0.0.0", + "description": "Native Node.js bindings for fastmulp multipart/form-data parsing.", + "license": "GPL-3.0-or-later", + "main": "index.js", + "types": "index.d.ts", + "files": [ + "fastmulp.node", + "index.d.ts", + "index.js", + "LICENSE", + "README.md" + ], + "engines": { + "node": ">=18" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/packages/fastmulp-wasm/README.md b/packages/fastmulp-wasm/README.md new file mode 100644 index 0000000..38805f8 --- /dev/null +++ b/packages/fastmulp-wasm/README.md @@ -0,0 +1,10 @@ +# fastmulp wasm binding + +Browser package for `fastmulp`. The release workflow builds the Rust wasm target, runs `wasm-bindgen --target bundler`, and packs the generated JavaScript, wasm, and TypeScript declarations. + +```ts +import { parse } from "fastmulp-wasm"; + +const parts = parse(formBytes, boundary); +const fieldBytes = formBytes.subarray(parts[0].body_start, parts[0].body_end); +``` diff --git a/packages/fastmulp-wasm/package.json b/packages/fastmulp-wasm/package.json new file mode 100644 index 0000000..b7f61d1 --- /dev/null +++ b/packages/fastmulp-wasm/package.json @@ -0,0 +1,28 @@ +{ + "name": "fastmulp-wasm", + "version": "0.0.0", + "description": "Browser wasm bindings for fastmulp multipart/form-data parsing.", + "license": "GPL-3.0-or-later", + "type": "module", + "module": "fastmulp_wasm.js", + "types": "fastmulp_wasm.d.ts", + "exports": { + ".": { + "types": "./fastmulp_wasm.d.ts", + "import": "./fastmulp_wasm.js" + }, + "./fastmulp_wasm_bg.wasm": "./fastmulp_wasm_bg.wasm" + }, + "files": [ + "fastmulp_wasm.d.ts", + "fastmulp_wasm.js", + "fastmulp_wasm_bg.js", + "fastmulp_wasm_bg.wasm", + "fastmulp_wasm_bg.wasm.d.ts", + "LICENSE", + "README.md" + ], + "publishConfig": { + "access": "public" + } +} diff --git a/scripts/install-wasm-bindgen.sh b/scripts/install-wasm-bindgen.sh new file mode 100644 index 0000000..09b2280 --- /dev/null +++ b/scripts/install-wasm-bindgen.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +version="$( + awk ' + $0 == "name = \"wasm-bindgen\"" { found=1; next } + found && /^version = "/ { + gsub(/^version = "/, "", $0) + gsub(/"$/, "", $0) + print + exit + } + ' Cargo.lock +)" + +test -n "${version}" + +installed="$(wasm-bindgen --version 2>/dev/null | awk '{ print $2 }' || true)" +if [[ "${installed}" == "${version}" ]]; then + echo "wasm-bindgen ${version} is already installed" + exit 0 +fi + +cargo install wasm-bindgen-cli --version "${version}" --locked diff --git a/scripts/package-bindings.mjs b/scripts/package-bindings.mjs new file mode 100644 index 0000000..b6c0d25 --- /dev/null +++ b/scripts/package-bindings.mjs @@ -0,0 +1,136 @@ +import { execFileSync } from "node:child_process"; +import { + copyFileSync, + cpSync, + existsSync, + mkdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +const root = resolve(dirname(fileURLToPath(import.meta.url)), ".."); +const mode = process.argv.includes("--pack") ? "pack" : "dry-run"; +const distRoot = join(root, "dist", "bindings"); +const artifactsDir = join(distRoot, "artifacts"); + +const version = workspaceVersion(); + +rmSync(distRoot, { force: true, recursive: true }); +mkdirSync(artifactsDir, { recursive: true }); + +const packages = [prepareNodePackage(), prepareWasmPackage()]; +for (const packageDir of packages) { + packPackage(packageDir); +} + +function prepareNodePackage() { + const packageDir = join(distRoot, "fastmulp-node"); + cpSync(join(root, "packages", "fastmulp-node"), packageDir, { recursive: true }); + copyFileSync(join(root, "LICENSE"), join(packageDir, "LICENSE")); + copyFileSync(nativeLibraryPath(), join(packageDir, "fastmulp.node")); + + const platform = npmPlatform(); + const arch = npmArch(); + updatePackageJson(packageDir, { + name: `fastmulp-node-${platform}-${arch}`, + version, + os: [platform], + cpu: [arch], + }); + return packageDir; +} + +function prepareWasmPackage() { + const packageDir = join(distRoot, "fastmulp-wasm"); + cpSync(join(root, "packages", "fastmulp-wasm"), packageDir, { recursive: true }); + copyFileSync(join(root, "LICENSE"), join(packageDir, "LICENSE")); + + const wasmInput = join( + root, + "target", + "wasm32-unknown-unknown", + "release", + "fastmulp_wasm.wasm", + ); + if (!existsSync(wasmInput)) { + throw new Error(`wasm build output was not found at ${wasmInput}`); + } + + execFileSync( + "wasm-bindgen", + [wasmInput, "--target", "bundler", "--out-dir", packageDir, "--out-name", "fastmulp_wasm"], + { cwd: root, stdio: "inherit" }, + ); + updatePackageJson(packageDir, { version }); + return packageDir; +} + +function packPackage(packageDir) { + const args = ["pack", "--json"]; + if (mode === "pack") { + args.push("--pack-destination", artifactsDir); + } else { + args.push("--dry-run"); + } + + const output = execFileSync("npm", args, { + cwd: packageDir, + encoding: "utf8", + stdio: ["ignore", "pipe", "inherit"], + }); + const [packed] = JSON.parse(output); + const action = mode === "pack" ? "packed" : "validated"; + console.log(`${action} ${packed.name}@${packed.version}`); +} + +function updatePackageJson(packageDir, updates) { + const packageJsonPath = join(packageDir, "package.json"); + const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8")); + Object.assign(packageJson, updates); + writeFileSync(packageJsonPath, `${JSON.stringify(packageJson, null, 2)}\n`); +} + +function workspaceVersion() { + const cargoToml = readFileSync(join(root, "Cargo.toml"), "utf8"); + const workspacePackage = cargoToml.match(/\[workspace\.package\]([\s\S]*?)(?:\n\[|$)/); + const versionMatch = workspacePackage?.[1].match(/^\s*version = "([^"]+)"/m); + if (!versionMatch) { + throw new Error("workspace package version was not found in Cargo.toml"); + } + return versionMatch[1]; +} + +function nativeLibraryPath() { + const releaseDir = join(root, "target", "release"); + const candidates = + process.platform === "darwin" + ? ["libfastmulp_napi.dylib"] + : process.platform === "win32" + ? ["fastmulp_napi.dll", "fastmulp-napi.dll"] + : ["libfastmulp_napi.so"]; + const nativeLibrary = candidates + .map((candidate) => join(releaseDir, candidate)) + .find((candidate) => existsSync(candidate)); + + if (!nativeLibrary) { + throw new Error(`fastmulp napi build output was not found in ${releaseDir}`); + } + return nativeLibrary; +} + +function npmPlatform() { + if (process.platform === "win32") { + return "win32"; + } + return process.platform; +} + +function npmArch() { + if (process.arch === "x64" || process.arch === "arm64") { + return process.arch; + } + throw new Error(`unsupported Node package architecture: ${process.arch}`); +} diff --git a/scripts/test-node-binding.mjs b/scripts/test-node-binding.mjs new file mode 100644 index 0000000..6799d24 --- /dev/null +++ b/scripts/test-node-binding.mjs @@ -0,0 +1,75 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, copyFileSync, existsSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { createRequire } from "node:module"; + +const releaseDir = resolve("target", "release"); +const candidates = + process.platform === "darwin" + ? ["libfastmulp_napi.dylib"] + : process.platform === "win32" + ? ["fastmulp_napi.dll", "fastmulp-napi.dll"] + : ["libfastmulp_napi.so"]; + +const nativeLibrary = candidates + .map((candidate) => join(releaseDir, candidate)) + .find((candidate) => existsSync(candidate)); + +if (!nativeLibrary) { + throw new Error(`fastmulp napi build output was not found in ${releaseDir}`); +} + +const tempDir = mkdtempSync(join(tmpdir(), "fastmulp-napi-")); +const addonPath = join(tempDir, "fastmulp.node"); +copyFileSync(nativeLibrary, addonPath); + +try { + const require = createRequire(import.meta.url); + const addon = require(addonPath); + + assert.deepEqual(Object.keys(addon).sort(), [ + "boundaryFromContentType", + "parse", + "parseContentType", + ]); + + const body = Buffer.from( + '--abc123\r\nContent-Disposition: form-data; name="field"\r\n\r\npayload\r\n--abc123--\r\n', + ); + const parts = addon.parse(body, "abc123"); + assert.equal(parts.length, 1); + assert.equal(parts[0].name, "field"); + assert.equal(body.subarray(parts[0].bodyStart, parts[0].bodyEnd).toString(), "payload"); + assert.deepEqual(parts[0].headers, [ + { + name: "Content-Disposition", + value: 'form-data; name="field"', + }, + ]); + + const escapedBoundaryBody = Buffer.from( + '--abc:123\r\nContent-Disposition: form-data; name="field"\r\n\r\npayload\r\n--abc:123--\r\n', + ); + const contentTypeParts = addon.parseContentType( + escapedBoundaryBody, + 'multipart/form-data; boundary="abc\\:123"', + ); + assert.equal(contentTypeParts[0].name, "field"); + assert.equal( + addon.boundaryFromContentType('multipart/form-data; boundary="abc\\:123"'), + "abc:123", + ); + + const invalidUtf8Name = Buffer.concat([ + Buffer.from('--abc123\r\nContent-Disposition: form-data; name="'), + Buffer.from([0xff]), + Buffer.from('"\r\n\r\npayload\r\n--abc123--\r\n'), + ]); + assert.throws( + () => addon.parse(invalidUtf8Name, "abc123"), + /name must be valid UTF-8/, + ); +} finally { + rmSync(tempDir, { force: true, recursive: true }); +} diff --git a/scripts/test-wasm-binding.mjs b/scripts/test-wasm-binding.mjs new file mode 100644 index 0000000..f5cf961 --- /dev/null +++ b/scripts/test-wasm-binding.mjs @@ -0,0 +1,59 @@ +import assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { existsSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { createRequire } from "node:module"; + +const wasmInput = resolve( + "target", + "wasm32-unknown-unknown", + "release", + "fastmulp_wasm.wasm", +); + +if (!existsSync(wasmInput)) { + throw new Error(`fastmulp wasm build output was not found at ${wasmInput}`); +} + +const tempDir = mkdtempSync(join(tmpdir(), "fastmulp-wasm-")); + +try { + execFileSync( + "wasm-bindgen", + [wasmInput, "--target", "nodejs", "--out-dir", tempDir, "--out-name", "fastmulp_wasm"], + { stdio: "inherit" }, + ); + + const require = createRequire(import.meta.url); + const wasm = require(join(tempDir, "fastmulp_wasm.js")); + + assert.deepEqual(Object.keys(wasm).sort(), [ + "boundary_from_content_type", + "parse", + "parseContentType", + ]); + + const body = Buffer.from( + '--abc123\r\nContent-Disposition: form-data; name="field"\r\n\r\npayload\r\n--abc123--\r\n', + ); + const parts = wasm.parse(body, "abc123"); + assert.equal(parts.length, 1); + assert.equal(parts[0].name, "field"); + assert.equal(body.subarray(parts[0].body_start, parts[0].body_end).toString(), "payload"); + + const escapedBoundaryBody = Buffer.from( + '--abc:123\r\nContent-Disposition: form-data; name="field"\r\n\r\npayload\r\n--abc:123--\r\n', + ); + const contentTypeParts = wasm.parseContentType( + escapedBoundaryBody, + 'multipart/form-data; boundary="abc\\:123"', + ); + assert.equal(contentTypeParts[0].name, "field"); + assert.equal( + wasm.boundary_from_content_type('multipart/form-data; boundary="abc\\:123"'), + "abc:123", + ); +} finally { + rmSync(tempDir, { force: true, recursive: true }); +}