From 6e3fcd6283f9b8b1fc385485d2fc4a465d53f079 Mon Sep 17 00:00:00 2001 From: Ryan Rauh Date: Sat, 9 May 2026 09:16:40 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20add=20text=20measurement=20helpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- measure.ts | 177 +++++++++++++++++++++++++++++++++++++++++ mod.ts | 1 + ops.ts | 24 +++++- specs/renderer-spec.md | 65 +++++++++++++++ test/measure.test.ts | 159 ++++++++++++++++++++++++++++++++++++ 5 files changed, 422 insertions(+), 4 deletions(-) create mode 100644 measure.ts create mode 100644 test/measure.test.ts diff --git a/measure.ts b/measure.ts new file mode 100644 index 0000000..dcc5e2d --- /dev/null +++ b/measure.ts @@ -0,0 +1,177 @@ +export interface WrapTextOptions { + mode?: "words" | "newlines" | "none"; +} + +export interface WrappedLine { + text: string; + width: number; +} + +export function measureCellWidth(text: string): number { + let width = 0; + for (let char of text) width += cellWidth(char); + return width; +} + +export function wrapText( + text: string, + width: number, + options: WrapTextOptions = {}, +): WrappedLine[] { + assertValidWidth(width); + if (text.length === 0) return []; + + let mode = options.mode ?? "words"; + switch (mode) { + case "none": { + let collapsed = text.replaceAll("\n", ""); + return collapsed === "" ? [] : line(collapsed); + } + case "newlines": + return text.split("\n").flatMap((part) => part === "" ? [] : line(part)); + case "words": + return wrapWords(text, width); + } +} + +export function measureWrappedHeight( + text: string, + width: number, + options: WrapTextOptions = {}, +): number { + assertValidWidth(width); + if (text.length === 0) return 0; + + let mode = options.mode ?? "words"; + switch (mode) { + case "none": + return text.replaceAll("\n", "") === "" ? 0 : 1; + case "newlines": + return countNonEmptyNewlineParts(text); + case "words": + return countWrappedWords(text, width); + } +} + +function line(text: string): WrappedLine[] { + return [{ text, width: measureCellWidth(text) }]; +} + +function assertValidWidth(width: number): void { + if (!Number.isFinite(width) || width < 0) { + throw new RangeError( + `width must be a finite non-negative number: ${width}`, + ); + } +} + +function wrapWords(text: string, maxWidth: number): WrappedLine[] { + let out: WrappedLine[] = []; + for (let paragraph of text.split("\n")) { + if (paragraph === "") continue; + let current = ""; + let currentWidth = 0; + + for (let token of tokens(paragraph)) { + let tokenWidth = measureCellWidth(token); + if (current !== "" && currentWidth + tokenWidth > maxWidth) { + out.push({ + text: current.trimEnd(), + width: measureCellWidth(current.trimEnd()), + }); + current = token.trimStart(); + currentWidth = measureCellWidth(current); + } else { + current += token; + currentWidth += tokenWidth; + } + + if (current !== "" && currentWidth > maxWidth && token.trim() !== "") { + out.push({ + text: current.trimEnd(), + width: measureCellWidth(current.trimEnd()), + }); + current = ""; + currentWidth = 0; + } + } + + if (current !== "") { + let text = current.trimEnd(); + if (text !== "") out.push({ text, width: measureCellWidth(text) }); + } + } + return out; +} + +function countWrappedWords(text: string, maxWidth: number): number { + let count = 0; + for (let paragraph of text.split("\n")) { + if (paragraph === "") continue; + let current = ""; + let currentWidth = 0; + + for (let token of tokens(paragraph)) { + let tokenWidth = measureCellWidth(token); + if (current !== "" && currentWidth + tokenWidth > maxWidth) { + let trimmed = current.trimEnd(); + if (trimmed !== "") count++; + current = token.trimStart(); + currentWidth = measureCellWidth(current); + } else { + current += token; + currentWidth += tokenWidth; + } + + if (current !== "" && currentWidth > maxWidth && token.trim() !== "") { + count++; + current = ""; + currentWidth = 0; + } + } + + if (current.trimEnd() !== "") count++; + } + return count; +} + +function countNonEmptyNewlineParts(text: string): number { + let count = 0; + for (let part of text.split("\n")) if (part !== "") count++; + return count; +} + +function* tokens(text: string): IterableIterator { + let re = /\S+\s*/g; + for (let match of text.matchAll(re)) yield match[0]; +} + +function cellWidth(char: string): number { + let code = char.codePointAt(0)!; + if (code === 0) return 0; + if (code < 32 || (code >= 0x7F && code < 0xA0)) return 0; + if (isCombining(code)) return 0; + return isWide(code) ? 2 : 1; +} + +function isCombining(code: number): boolean { + return (code >= 0x0300 && code <= 0x036F) || + (code >= 0x1AB0 && code <= 0x1AFF) || + (code >= 0x1DC0 && code <= 0x1DFF) || + (code >= 0x20D0 && code <= 0x20FF) || + (code >= 0xFE20 && code <= 0xFE2F); +} + +function isWide(code: number): boolean { + return (code >= 0x1100 && code <= 0x115F) || + code === 0x2329 || code === 0x232A || + (code >= 0x2E80 && code <= 0xA4CF && code !== 0x303F) || + (code >= 0xAC00 && code <= 0xD7A3) || + (code >= 0xF900 && code <= 0xFAFF) || + (code >= 0xFE10 && code <= 0xFE19) || + (code >= 0xFE30 && code <= 0xFE6F) || + (code >= 0xFF00 && code <= 0xFF60) || + (code >= 0xFFE0 && code <= 0xFFE6) || + (code >= 0x1F300 && code <= 0x1FAFF) || + (code >= 0x20000 && code <= 0x3FFFD); +} diff --git a/mod.ts b/mod.ts index 8862d13..a8d1dbd 100644 --- a/mod.ts +++ b/mod.ts @@ -3,3 +3,4 @@ export * from "./term.ts"; export * from "./input.ts"; export * from "./settings.ts"; export * from "./termcodes.ts"; +export * from "./measure.ts"; diff --git a/ops.ts b/ops.ts index 3344eea..c76d584 100644 --- a/ops.ts +++ b/ops.ts @@ -52,11 +52,27 @@ function packAxis(view: DataView, offset: number, axis: SizingAxis): number { return o; } -function packString(view: DataView, bytes: Uint8Array, o: number): number { +function packString( + view: DataView, + bytes: Uint8Array, + o: number, + end: number, + context: string, +): number { + let paddedLength = Math.ceil(bytes.length / 4) * 4; + let next = o + 4 + paddedLength; + if (next > end) { + throw new RangeError( + `clayterm transfer buffer capacity exceeded while packing ${context} ` + + `(${next} byte offset, ${end} byte limit). ` + + `Render a smaller visible slice or reduce frame content.`, + ); + } + view.setUint32(o, bytes.length, true); o += 4; new Uint8Array(view.buffer).set(bytes, o); - o += Math.ceil(bytes.length / 4) * 4; + o += paddedLength; return o; } @@ -82,7 +98,7 @@ export function pack( o += 4; let bytes = encoder.encode(op.id); - o = packString(view, bytes, o); + o = packString(view, bytes, o, end, "element id"); let mask = 0; if (op.layout) mask |= PROP_LAYOUT; @@ -192,7 +208,7 @@ export function pack( o += 4; let str = encoder.encode(op.content); - o = packString(view, str, o); + o = packString(view, str, o, end, "text content"); break; } } diff --git a/specs/renderer-spec.md b/specs/renderer-spec.md index fa4276a..22bb928 100644 --- a/specs/renderer-spec.md +++ b/specs/renderer-spec.md @@ -721,6 +721,71 @@ and used in tests. array into the transfer encoding described in Section 12.1. Currently exported but not public API; its exposure is incidental to the module structure. +### 12.6 Text measurement helpers + +The module may also expose pure TypeScript text-measurement helpers for callers +that need pre-layout estimates without instantiating a `Term`: + +```ts +interface WrapTextOptions { + mode?: "words" | "newlines" | "none"; +} + +interface WrappedLine { + text: string; + width: number; +} + +measureCellWidth(text: string): number; +wrapText( + text: string, + width: number, + options?: WrapTextOptions, +): WrappedLine[]; +measureWrappedHeight( + text: string, + width: number, + options?: WrapTextOptions, +): number; +``` + +The current intended behavior is: + +- `measureCellWidth()` returns the terminal cell width of the full string using + the same Unicode-width model described in Section 13. +- `wrapText()` returns line records with both the emitted text and its measured + width. +- `measureWrappedHeight()` returns the number of wrapped lines that `wrapText()` + would produce for the same inputs. +- `mode: "words"` wraps on token boundaries while preserving explicit newline + breaks. +- `mode: "newlines"` splits only on explicit `\n` characters and does not + perform width-based wrapping. +- `mode: "none"` collapses explicit newlines and returns at most one line. +- The helpers operate on JavaScript strings directly. They do not require the + caller's text to be copied into WASM linear memory or encoded into a full + UTF-8 byte buffer as a precondition for measurement. +- Large-input behavior is bounded by host JavaScript memory, not by Clayterm's + WASM linear-memory capacity. Inputs materially larger than the renderer's + initial WASM memory footprint are intended to remain valid helper inputs. +- `measureCellWidth()` and `measureWrappedHeight()` are intended to process + large inputs in a single pass over the string without allocating auxiliary + storage proportional to the UTF-8 byte length of the entire input. + `wrapText()` necessarily allocates output proportional to the number of + wrapped lines it returns, but it likewise should not require a second + full-input UTF-8 buffer. +- Rendering oversized whole-document input remains constrained by the renderer's + transfer buffer. If a frame exceeds transfer-buffer capacity while packing + text, Clayterm MUST throw a descriptive `RangeError` identifying the capacity + failure and SHOULD direct callers to render a smaller visible slice or reduce + frame content. Clayterm MUST NOT expose only the raw host-level TypedArray + message `"offset is out of bounds"` for this condition. + +These helpers are independent of the renderer's frame lifecycle and perform no +IO or WASM interaction. They exist as convenience APIs for higher-level +frameworks and virtualized views that need width and height estimation before +building directive arrays. + --- ## 13. Implementation Notes diff --git a/test/measure.test.ts b/test/measure.test.ts new file mode 100644 index 0000000..584235a --- /dev/null +++ b/test/measure.test.ts @@ -0,0 +1,159 @@ +import { describe, expect, it } from "./suite.ts"; +import { + close, + createTerm, + fixed, + grow, + measureCellWidth, + measureWrappedHeight, + open, + text, + wrapText, +} from "../mod.ts"; +import { createTermNative } from "../term-native.ts"; +import { print } from "./print.ts"; + +const decode = (bytes: Uint8Array) => new TextDecoder().decode(bytes); + +function makeOversizedDocument(memoryBytes: number) { + let targetBytes = memoryBytes * 2 + 65_536; + let encoder = new TextEncoder(); + let lines: string[] = []; + let bytes = 0; + for (let i = 0; bytes < targetBytes; i++) { + let line = `line-${i.toString().padStart(6, "0")} 🙂🙂🙂🙂🙂🙂🙂🙂🙂🙂`; + lines.push(line); + bytes += encoder.encode(line + "\n").length; + } + return { content: lines.join("\n"), lines, bytes }; +} + +describe("text measurement helpers", () => { + it("exports helpers that can be called without creating a Term", () => { + expect(measureCellWidth("hello")).toBe(5); + expect(wrapText("hello world", 5).map((l) => l.text)).toEqual([ + "hello", + "world", + ]); + expect(measureWrappedHeight("hello world", 5)).toBe(2); + }); + + it("measures ASCII, combining marks, and wide characters", () => { + expect(measureCellWidth("hello")).toBe(5); + expect(measureCellWidth("e\u0301")).toBe(1); + expect(measureCellWidth("文字")).toBe(4); + expect(measureCellWidth("🙂")).toBe(2); + }); + + it("supports words, newlines, and none wrap modes", () => { + expect(wrapText("hello world", 5)).toEqual([ + { text: "hello", width: 5 }, + { text: "world", width: 5 }, + ]); + expect(wrapText("hello world", 5, { mode: "words" })).toEqual( + wrapText("hello world", 5), + ); + + expect(wrapText("hello world\nwide", 5, { mode: "newlines" })).toEqual([ + { text: "hello world", width: 11 }, + { text: "wide", width: 4 }, + ]); + + expect(wrapText("hello\nworld", 5, { mode: "none" })).toEqual([ + { text: "helloworld", width: 10 }, + ]); + }); + + it("keeps measureWrappedHeight equal to wrapText length", () => { + for (let mode of ["words", "newlines", "none"] as const) { + let input = "one two three\nfour five"; + expect(measureWrappedHeight(input, 7, { mode })).toBe( + wrapText(input, 7, { mode }).length, + ); + } + }); + + it("handles empty input deterministically", () => { + expect(measureCellWidth("")).toBe(0); + expect(wrapText("", 10)).toEqual([]); + expect(measureWrappedHeight("", 10)).toBe(0); + }); + + it("rejects invalid widths", () => { + for (let width of [-1, Number.NaN, Number.POSITIVE_INFINITY]) { + expect(() => wrapText("x", width)).toThrow(RangeError); + expect(() => measureWrappedHeight("x", width)).toThrow(RangeError); + } + }); + + it("does not instantiate WebAssembly or UTF-8 encode as a precondition for measurement", () => { + let instantiate = WebAssembly.instantiate; + let encode = TextEncoder.prototype.encode; + try { + WebAssembly.instantiate = (() => { + throw new Error("unexpected wasm instantiate"); + }) as typeof WebAssembly.instantiate; + TextEncoder.prototype.encode = function () { + throw new Error("unexpected text encode"); + }; + + expect(measureCellWidth("hello 🙂")).toBe(8); + expect(measureWrappedHeight("hello world", 5)).toBe(2); + expect(wrapText("hello world", 5).length).toBe(2); + } finally { + WebAssembly.instantiate = instantiate; + TextEncoder.prototype.encode = encode; + } + }); + + it("renders a visible window from a UTF-8 document larger than 2x renderer memory", async () => { + let native = await createTermNative(80, 24); + let document = makeOversizedDocument(native.memory.buffer.byteLength); + expect(document.bytes).toBeGreaterThan(native.memory.buffer.byteLength * 2); + + let term = await createTerm({ width: 80, height: 24 }); + let error: unknown; + try { + term.render([ + open("root", { layout: { width: grow(), height: grow() } }), + text(document.content), + close(), + ]); + } catch (caught) { + error = caught; + } + expect(error).toBeInstanceOf(RangeError); + expect((error as Error).message).toMatch( + /transfer buffer|capacity|packing/, + ); + expect((error as Error).message).not.toBe("offset is out of bounds"); + expect((error as Error).message).toMatch( + /smaller visible slice|reduce frame content/, + ); + + expect(measureWrappedHeight(document.content, 80, { mode: "newlines" })) + .toBe(document.lines.length); + + let start = Math.floor(document.lines.length / 2); + let visible = document.lines.slice(start, start + 3).join("\n"); + let out = print( + decode( + term.render([ + open("root", { + layout: { width: fixed(80), height: fixed(24), direction: "ttb" }, + }), + text(visible), + close(), + ]).output, + ), + 80, + 24, + ); + + let marker = (index: number) => document.lines[index].slice(0, 11); + expect(out).toContain(marker(start)); + expect(out).toContain(marker(start + 2)); + expect(out).not.toContain(marker(start - 1)); + expect(out).not.toContain(marker(start + 3)); + }); +});