Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,27 @@ jobs:
cargo test -p fastmulp-core --doc --locked
cargo test -p fastmulp-wasm --doc --locked

bindings:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v6
with:
node-version: "24"
- name: Enable pnpm
run: corepack enable
- name: Install wasm-bindgen CLI
run: bash scripts/install-wasm-bindgen.sh
- name: Node binding smoke test
run: pnpm test:node-binding
- name: Wasm binding smoke test
run: pnpm test:wasm-binding

bench:
if: github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
Expand Down
73 changes: 73 additions & 0 deletions .github/workflows/release_bindings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: release-bindings

on:
push:
tags:
- "v*"

permissions:
contents: write

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false

defaults:
run:
shell: bash

env:
CARGO_TERM_COLOR: always

jobs:
publish-binding-artifacts:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v6
with:
node-version: "24"
- name: Enable pnpm
run: corepack enable
- name: Verify tag is on main
run: git branch --remote --contains "${GITHUB_SHA}" | grep -qx ' origin/main'
- name: Verify tag matches workspace version
run: |
expected="${GITHUB_REF_NAME#v}"
actual="$(awk '
/^\[workspace\.package\]/ { in_section=1; next }
/^\[/ && in_section { exit }
in_section && /^version = "/ {
gsub(/^version = "/, "", $0)
gsub(/"$/, "", $0)
print
exit
}
' Cargo.toml)"
test "${expected}" = "${actual}"
- name: Build and test Node binding
run: pnpm test:node-binding
- name: Install wasm-bindgen CLI
run: bash scripts/install-wasm-bindgen.sh
- name: Test wasm binding
run: pnpm test:wasm-binding
- name: Package binding artifacts
run: node scripts/package-bindings.mjs --pack
- name: Checksum artifacts
run: |
cd dist/bindings/artifacts
sha256sum *.tgz > SHA256SUMS
- name: Create or update GitHub release
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release view "${GITHUB_REF_NAME}" >/dev/null 2>&1 \
|| gh release create "${GITHUB_REF_NAME}" --verify-tag --title "${GITHUB_REF_NAME}"
gh release upload "${GITHUB_REF_NAME}" dist/bindings/artifacts/*.tgz dist/bindings/artifacts/SHA256SUMS --clobber
31 changes: 26 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ High-accuracy, low-allocation `multipart/form-data` parsing with a zero-copy Rus
- The Rust core parses against a borrowed `&[u8]` and returns body ranges instead of copying payload bytes.
- `Content-Disposition` is parsed eagerly, and `name` is enforced for `form-data` parts.
- Header storage uses `SmallVec` so the common case stays stack-friendly.
- Node.js and browser bindings return metadata plus `body_start` / `body_end`, so callers can slice the original buffer themselves.
- Node.js and browser bindings return metadata plus body ranges, so callers can slice the original buffer themselves.
- Boundary lines accept RFC 2046 transport padding, plus MIME-style preamble and epilogue.

## Spec Notes
Expand Down Expand Up @@ -51,15 +51,14 @@ Node.js:
import { parse } from "./fastmulp.node";

const parts = parse(bodyBuffer, boundary);
const fileBytes = bodyBuffer.subarray(parts[0].body_start, parts[0].body_end);
const fileBytes = bodyBuffer.subarray(parts[0].bodyStart, parts[0].bodyEnd);
```

Browser:

```ts
import init, { parse } from "./fastmulp_wasm.js";
import { parse } from "fastmulp-wasm";

await init();
const parts = parse(formBytes, boundary);
const fieldBytes = formBytes.subarray(parts[0].body_start, parts[0].body_end);
```
Expand All @@ -68,14 +67,36 @@ The wasm target still needs one JS-to-wasm copy at the ABI boundary, but it avoi

Older nested `multipart/mixed` payloads can be handled by recursively calling `parse` on a part body after extracting the nested boundary from that part's `Content-Type`.

### Parser Limits

Use `parse_with_limits` or `MultipartParser::new_with_limits` when parsing untrusted uploads on a server boundary. `ParseLimits` can cap the number of parts, the number of headers per part, and the number of header bytes per part. The plain `parse` API stays unlimited for compatibility and small trusted payloads.

### Security Notes

Treat `filename` as display metadata only. Strip path separators and platform-specific path components before using it in storage, generate your own server-side object names, and keep the original value only as untrusted metadata. Preserve duplicate field names in order unless your application explicitly defines a merge rule. For nested `multipart/mixed`, extract the nested boundary from that part's `Content-Type` and parse the nested body with its own limits.

### Binding Artifacts

Tag pushes build npm-compatible release artifacts for the JS targets:

- `fastmulp-node-linux-x64-*.tgz`: platform-specific Node.js native addon containing `fastmulp.node`, CommonJS entrypoint, and TypeScript declarations.
- `fastmulp-wasm-*.tgz`: browser wasm package containing wasm-bindgen generated JavaScript glue, wasm, and TypeScript declarations.

The Node binding uses camelCase object fields such as `bodyStart`, `bodyEnd`, `fileName`, and `contentType`. The wasm binding preserves the existing snake_case field names generated by `wasm-bindgen`.

```ts
import { parse as parseNode } from "fastmulp-node-linux-x64";
import { parse as parseWasm } from "fastmulp-wasm";
```

## Release

- `vp run release:patch`
- `vp run release:minor`
- `vp run release:alpha`
- `vp run release:beta`

Each release command updates the workspace version in `Cargo.toml`, creates a release commit, and creates the matching `v...` git tag. Tag pushes publish `fastmulp-core` through GitHub Actions trusted publishing.
Each release command updates the workspace version in `Cargo.toml`, creates a release commit, and creates the matching `v...` git tag. Tag pushes publish `fastmulp-core` through GitHub Actions trusted publishing and attach Node/wasm binding artifacts to the matching GitHub Release.

## Shared Tasks

Expand Down
47 changes: 29 additions & 18 deletions crates/fastmulp_core/src/boundary.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::borrow::Cow;

use smallvec::SmallVec;

use crate::{
Expand All @@ -11,8 +13,12 @@ pub struct Boundary<'a> {
opening: SmallVec<[u8; 72]>,
}

pub fn boundary_from_content_type(_content_type: &str) -> Option<&str> {
let bytes = _content_type.as_bytes();
/// Extracts the `boundary` parameter from a multipart `Content-Type` header value.
///
/// The returned value is borrowed for ordinary parameters and owned when a
/// quoted-string contains MIME quoted-pair escapes that need to be decoded.
pub fn boundary_from_content_type(content_type: &str) -> Option<Cow<'_, str>> {
let bytes = content_type.as_bytes();
let mut cursor = skip_ascii_whitespace(bytes, 0);
let media_start = cursor;

Expand Down Expand Up @@ -52,17 +58,13 @@ pub fn boundary_from_content_type(_content_type: &str) -> Option<&str> {
cursor += 1;
cursor = skip_ascii_whitespace(bytes, cursor);

let value = parse_content_type_value(_content_type, cursor)?;
let value = parse_content_type_value(content_type, cursor)?;
cursor = skip_ascii_whitespace(bytes, value.next);
if cursor < bytes.len() && bytes[cursor] != b';' {
return None;
}

if eq_ignore_ascii_case(name, b"boundary") {
if value.requires_unescape {
return None;
}

return Some(value.raw);
}
}
Expand Down Expand Up @@ -106,9 +108,8 @@ impl<'a> Boundary<'a> {
}

struct ContentTypeValue<'a> {
raw: &'a str,
raw: Cow<'a, str>,
next: usize,
requires_unescape: bool,
}

fn parse_content_type_value(content_type: &str, start: usize) -> Option<ContentTypeValue<'_>> {
Expand All @@ -120,26 +121,37 @@ fn parse_content_type_value(content_type: &str, start: usize) -> Option<ContentT
if bytes[start] == b'"' {
let inner_start = start + 1;
let mut cursor = inner_start;
let mut requires_unescape = false;
let mut decoded = None::<Vec<u8>>;
let mut copied_from = inner_start;
while cursor < bytes.len() {
match bytes[cursor] {
b'"' => {
if let Some(mut decoded) = decoded {
decoded.extend_from_slice(&bytes[copied_from..cursor]);
let raw = String::from_utf8(decoded).ok()?;
return Some(ContentTypeValue {
raw: Cow::Owned(raw),
next: cursor + 1,
});
}

return content_type
.get(inner_start..cursor)
.map(|raw| ContentTypeValue {
raw,
raw: Cow::Borrowed(raw),
next: cursor + 1,
requires_unescape,
});
}
b'\\' => {
requires_unescape = true;
cursor += 1;
if cursor == bytes.len() {
if cursor + 1 == bytes.len() {
return None;
}

cursor += 1;
let decoded = decoded.get_or_insert_with(|| Vec::with_capacity(bytes.len()));
decoded.extend_from_slice(&bytes[copied_from..cursor]);
decoded.push(bytes[cursor + 1]);
cursor += 2;
copied_from = cursor;
}
_ => {
cursor += 1;
Expand All @@ -157,9 +169,8 @@ fn parse_content_type_value(content_type: &str, start: usize) -> Option<ContentT

let end = trim_ascii_end(bytes, cursor);
content_type.get(start..end).map(|raw| ContentTypeValue {
raw,
raw: Cow::Borrowed(raw),
next: cursor,
requires_unescape: false,
})
}

Expand Down
15 changes: 15 additions & 0 deletions crates/fastmulp_core/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use core::fmt;

#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Error {
EmptyBoundary,
Expand All @@ -17,6 +18,9 @@ pub enum Error {
InvalidContentDisposition { offset: usize },
MissingClosingBoundary { offset: usize },
TrailingData { offset: usize },
PartLimitExceeded { limit: usize },
HeaderCountLimitExceeded { limit: usize, offset: usize },
HeaderBytesLimitExceeded { limit: usize, offset: usize },
}

impl fmt::Display for Error {
Expand Down Expand Up @@ -87,6 +91,17 @@ impl fmt::Display for Error {
f,
"multipart body has trailing data after byte offset {offset}"
),
Self::PartLimitExceeded { limit } => {
write!(f, "multipart part count exceeded configured limit {limit}")
}
Self::HeaderCountLimitExceeded { limit, offset } => write!(
f,
"multipart header count exceeded configured limit {limit} at byte offset {offset}"
),
Self::HeaderBytesLimitExceeded { limit, offset } => write!(
f,
"multipart header bytes exceeded configured limit {limit} at byte offset {offset}"
),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/fastmulp_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mod util;
pub use boundary::{Boundary, boundary_from_content_type};
pub use error::{Error, Result};
pub use header::Header;
pub use parser::{Multipart, MultipartParser, parse};
pub use parser::{Multipart, MultipartParser, ParseLimits, parse, parse_with_limits};
pub use part::Part;
pub use text::TextValue;

Expand Down
Loading