diff --git a/docs/.docker/Dockerfile b/docs/.docker/Dockerfile index fd354d22..a9900af7 100644 --- a/docs/.docker/Dockerfile +++ b/docs/.docker/Dockerfile @@ -1,56 +1,103 @@ +# markup-antora — one Antora image to use everywhere. +# +# Consolidates the best of the ~9 Antora images that grew across the +# projects tree. Design priorities, in order: +# 1. Mermaid rendered FULLY OFFLINE — no Kroki server, no kroki.io, +# no CDN at build time and none at view time. Diagrams are baked +# to inline SVG by mermaid-cli (Alpine Chromium + Puppeteer) +# through the local-mermaid-extension.js Asciidoctor block +# processor, with content-hash caching so repeated diagrams render +# once per build. +# 2. Works under `--user $(id -u):$(id -g)` (rootless) without the +# Chromium crashpad / cosmiconfig EACCES failures. +# 3. Offline extras available but not forced: lunr full-text search, +# a pre-baked Antora UI bundle, and MathJax es5 for LaTeX. +# 4. asciidoctor-kroki installed-but-unused as an escape hatch. FROM node:20-alpine -LABEL org.opencontainers.image.title="SKaiNET Antora" \ - org.opencontainers.image.description="Antora site generator with direct local Mermaid rendering (no Kroki round trip)" \ +LABEL org.opencontainers.image.title="markup-antora" \ + org.opencontainers.image.description="Universal Antora site generator with offline Mermaid (mermaid-cli), offline search (lunr), pre-baked UI bundle + MathJax. No Kroki, no CDN." \ org.opencontainers.image.source="https://github.com/SKaiNET-developers/SKaiNET" -# Chromium for mermaid-cli (puppeteer) -RUN apk add --no-cache chromium font-noto +# Chromium for mermaid-cli (Puppeteer). Full font set so diagram labels, +# emoji and CJK render correctly (merged from the Daily-StandAPP image). +RUN apk add --no-cache \ + chromium \ + nss \ + freetype \ + harfbuzz \ + ttf-freefont \ + font-noto \ + font-noto-emoji \ + ca-certificates \ + git -# HOME=/tmp: chromium's crashpad handler writes its database under $HOME and -# aborts with `chrome_crashpad_handler: --database is required` when the -# container runs as `--user $(id -u):$(id -g)` and $HOME falls back to `/` -# (no passwd entry, not writable). Same motivation as runtime.cache_dir in -# antora-playbook.yml. +# HOME=/tmp: Chromium's crashpad handler writes its database under $HOME +# and aborts with `--database is required` when the container runs as a +# non-root --user and $HOME falls back to `/` (no passwd entry, not +# writable). Same motivation as runtime.cache_dir in the playbook. ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser \ PUPPETEER_SKIP_DOWNLOAD=true \ HOME=/tmp -# Install Antora + mermaid-cli into /opt/antora (not /antora which gets -# volume-mounted at run time). asciidoctor-kroki is intentionally NOT -# installed — it depends on a Kroki HTTP server (kroki.io or local) -# which returns 400 for large diagrams when using GET and has no -# offline fallback. We render mermaid directly via mermaid-cli through -# the local-mermaid-extension.js asciidoctor block processor. +# Install Antora + tooling into /opt/antora (NOT /antora, which is where +# the project gets volume-mounted at run time). +# - @mermaid-js/mermaid-cli : offline diagram rendering (the point) +# - @antora/lunr-extension : offline full-text search +# - asciidoctor-kroki : escape hatch only; the playbook should +# use the local mermaid extension instead. WORKDIR /opt/antora RUN npm init -y && npm i --save-exact \ @antora/cli@3.1 \ @antora/site-generator@3.1 \ + @antora/lunr-extension@1.0.0-alpha.8 \ @mermaid-js/mermaid-cli@11 \ + asciidoctor-kroki@0.18 \ && npm cache clean --force -# Make installed modules visible when workdir is the mounted project +# Make installed modules resolvable even when the workdir is the mounted +# project (which has no node_modules of its own). ENV NODE_PATH=/opt/antora/node_modules -# Mermaid-cli config — used by the local-mermaid-extension to drive -# Puppeteer against the pre-installed Alpine Chromium. -RUN echo '{ \ - "executablePath": "/usr/bin/chromium-browser", \ - "args": ["--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage"] \ -}' > /opt/antora/puppeteer-config.json - -# Bake the local mermaid extension in at an absolute path so the -# Antora playbook can reference it without any volume-mount gymnastics. +# Mermaid-cli / Puppeteer config and the offline block processor, baked +# in at absolute paths the playbook can reference without mount gymnastics. +COPY puppeteer-config.json /opt/antora/puppeteer-config.json COPY local-mermaid-extension.js /opt/antora/local-mermaid-extension.js -# Verify mermaid-cli works end to end at image build time. The cleanup -# also removes mode-0700 root-owned dirs (e.g. /tmp/.config/puppeteer, -# /tmp/.local/share/chromium) that puppeteer/chromium drop into $HOME -# during this run — leaving them in place would make cosmiconfig EACCES -# when the container is later launched with a non-root --user. +# --- Offline assets (available, not forced) ------------------------------- + +# Pre-download the default Antora UI bundle so sites build without hitting +# gitlab.com. Reference it from a playbook with: +# ui: +# bundle: +# url: /opt/antora-ui/ui-bundle.zip +# snapshot: true +RUN mkdir -p /opt/antora-ui \ + && wget -q -O /opt/antora-ui/ui-bundle.zip \ + "https://gitlab.com/antora/antora-ui-default/-/jobs/artifacts/HEAD/raw/build/ui-bundle.zip?job=bundle-stable" + +# Pre-download MathJax es5 for offline LaTeX. Copy /opt/mathjax/es5 into a +# supplemental UI or reference it from your UI template for client-side math. +RUN mkdir -p /opt/mathjax \ + && npm pack mathjax@3 --pack-destination /tmp \ + && tar -xzf /tmp/mathjax-*.tgz -C /tmp \ + && cp -r /tmp/package/es5 /opt/mathjax/es5 \ + && rm -rf /tmp/mathjax-* /tmp/package + +# --- Build-time smoke test + rootless cleanup ----------------------------- + +# Verify mermaid-cli works end to end so a broken image fails the build, +# not the user's first run. The cleanup also removes the mode-0700 +# root-owned dirs (/tmp/.config/puppeteer, /tmp/.local/share/chromium, +# /tmp/.cache, /tmp/.npm) that Puppeteer/Chromium drop into $HOME during +# this run — leaving them would make cosmiconfig EACCES when the container +# is later launched with a non-root --user. RUN echo 'graph TD; A-->B;' > /tmp/test.mmd \ - && npx mmdc -i /tmp/test.mmd -o /tmp/test.svg -p /opt/antora/puppeteer-config.json \ + && /opt/antora/node_modules/.bin/mmdc \ + -i /tmp/test.mmd -o /tmp/test.svg \ + -p /opt/antora/puppeteer-config.json --quiet \ && rm -rf /tmp/test.mmd /tmp/test.svg /tmp/.config /tmp/.local /tmp/.npm /tmp/.cache +WORKDIR /antora ENTRYPOINT ["/opt/antora/node_modules/.bin/antora"] CMD ["--stacktrace", "antora-playbook.yml"] diff --git a/docs/.docker/README.md b/docs/.docker/README.md new file mode 100644 index 00000000..f5589dda --- /dev/null +++ b/docs/.docker/README.md @@ -0,0 +1,72 @@ +# Docs Antora image (`docs/.docker`) + +Self-contained Antora image used to build this repo's documentation site. +It is the consolidated "markup-antora" image — one definition shared across +the SKaiNET docs projects — vendored here until the public registry image +is published (after which this `Dockerfile` collapses to a single `FROM`). + +## Features + +- **Offline Mermaid** — every `[mermaid]` block is rendered to **inline SVG** + at build time by `mermaid-cli` (Alpine Chromium + Puppeteer) via the baked-in + `local-mermaid-extension.js` Asciidoctor block processor. No Kroki server, + no `kroki.io`, no network — at build time *or* view time. Removes the + asciidoctor-kroki 4 KB GET-URL limit that rejected large diagrams. +- **Diagram caching** — content-hash, in-memory + optional on-disk + (`MERMAID_CACHE_DIR`); identical diagrams render once. +- **Rootless-safe** — runs under `--user $(id -u):$(id -g)` without the + Chromium crashpad / cosmiconfig `EACCES` failures (`HOME=/tmp`, build-time + cleanup of root-owned `/tmp` dirs). +- **Build-time smoke test** — a broken image fails `docker build`, not your + first render. +- **Offline extras** — `@antora/lunr-extension` (search), a pre-baked Antora + UI bundle, and MathJax es5 for LaTeX are available in the image. +- **Kroki escape hatch** — `asciidoctor-kroki` is installed (unused here) for + other diagram types if ever needed. +- Full Alpine font set (`font-noto`, `font-noto-emoji`, `ttf-freefont`, …) so + diagram labels, emoji and CJK render correctly. + +## Files + +| File | Purpose | +|---|---| +| `Dockerfile` | The consolidated image definition (build context = this dir). | +| `local-mermaid-extension.js` | Offline Mermaid block processor; baked to `/opt/antora/`. | +| `puppeteer-config.json` | Chromium flags for mermaid-cli; baked to `/opt/antora/`. | + +The playbook wires the extension via +`asciidoc.extensions: [ /opt/antora/local-mermaid-extension.js ]`. + +## Usage + +Build the image (context is this directory): + +```bash +docker build -t skainet-antora:local -f docs/.docker/Dockerfile docs/.docker/ +``` + +Render the site (run from the repo root; mount the repo at `/antora`, run as +your user so output isn't root-owned): + +```bash +docker run --rm \ + --user "$(id -u):$(id -g)" \ + -v "$PWD:/antora" \ + --workdir /antora/docs \ + skainet-antora:local \ + --stacktrace antora-playbook.yml + +# Output: docs/build/site/index.html +``` + +This is exactly what `.github/workflows/docs.yml` does in CI — it builds the +image from this directory and runs the container the same way. + +Write diagrams as normal Asciidoctor blocks: + +```adoc +[mermaid] +---- +graph TD; A-->B; B-->C; +---- +``` diff --git a/docs/.docker/local-mermaid-extension.js b/docs/.docker/local-mermaid-extension.js index 35b4c776..a7163e90 100644 --- a/docs/.docker/local-mermaid-extension.js +++ b/docs/.docker/local-mermaid-extension.js @@ -1,45 +1,78 @@ 'use strict' /* - * Local mermaid block processor for Asciidoctor.js. + * Offline Mermaid block processor for Asciidoctor.js / Antora. * - * Replaces the asciidoctor-kroki dependency on kroki.io (and its - * GET URL length limit / 400 rejections on large diagrams) with a - * direct invocation of `mmdc` — the @mermaid-js/mermaid-cli binary - * that the SKaiNET Antora Docker image already bakes in for its - * Chromium-backed Puppeteer rendering path. + * Replaces asciidoctor-kroki's dependency on a Kroki HTTP server + * (kroki.io or a local container — GET URL length limits, 400s on + * large diagrams, no offline fallback) with a direct, synchronous + * invocation of `mmdc` (@mermaid-js/mermaid-cli) baked into the + * markup-antora image at /opt/antora. Nothing leaves the container. * - * The extension is registered via the Antora playbook's - * `asciidoc.extensions` list and gets passed the Asciidoctor.js - * `registry` object. For every `[mermaid]\n----\n...\n----` block - * in any page, we: + * For every `[mermaid]\n----\n...\n----` (or literal `....`) block: + * 1. hash the source (md5) and consult the cache + * 2. on miss: write source to a temp file, run mmdc to produce SVG, + * read it back, store in the cache + * 3. inline the SVG via a `pass` block so Asciidoctor emits raw SVG + * straight into the HTML output * - * 1. write the source to a temp file - * 2. exec `mmdc -i in.mmd -o out.svg -p puppeteer-config.json` - * (synchronous — Antora processes one page at a time and the - * mermaid-cli call is fast enough that sync is fine) - * 3. read the produced SVG - * 4. inline it via a `pass` block so Asciidoctor emits the raw - * SVG markup straight into the HTML output + * Caching (merged from the Daily-StandAPP pipeline extension): + * - In-memory Map, keyed by content hash, dedupes identical diagrams + * across every page in a single build run. + * - If MERMAID_CACHE_DIR is set, the SVG is also persisted there as + * .svg, so unchanged diagrams survive across build runs + * (point it at a host-mounted, writable dir to benefit). * - * On render failure we fall back to a literal block containing - * the original source plus the error message, matching the - * degradation mode asciidoctor-kroki uses. + * On render failure we degrade to a literal block containing the + * original source plus the error — matching asciidoctor-kroki's mode. + * + * Register it from the Antora playbook: + * asciidoc: + * extensions: + * - /opt/antora/local-mermaid-extension.js */ const { execSync } = require('child_process') -const { mkdtempSync, writeFileSync, readFileSync, rmSync } = require('fs') +const { mkdtempSync, writeFileSync, readFileSync, rmSync, + existsSync, mkdirSync } = require('fs') const { tmpdir } = require('os') const { join } = require('path') +const { createHash } = require('crypto') -// Absolute paths baked into /opt/antora at image build time. -// These have to match the Dockerfile that installs mermaid-cli and -// writes the puppeteer config. +// Absolute paths baked into /opt/antora at image build time. These must +// match the Dockerfile that installs mermaid-cli and the puppeteer config. const MMDC_BIN = '/opt/antora/node_modules/.bin/mmdc' const PUPPETEER_CONFIG = '/opt/antora/puppeteer-config.json' +// Optional cross-run disk cache. +const DISK_CACHE_DIR = process.env.MERMAID_CACHE_DIR || null + +// In-process cache: hash -> svg. Dedupes within a single build run. +const memCache = new Map() + +function hashOf (source) { + return createHash('md5').update(source).digest('hex').slice(0, 16) +} + function renderMermaidToSvg (source) { - const dir = mkdtempSync(join(tmpdir(), 'skainet-mm-')) + const key = hashOf(source) + + // 1. in-memory hit + if (memCache.has(key)) return memCache.get(key) + + // 2. disk hit + let diskPath = null + if (DISK_CACHE_DIR) { + diskPath = join(DISK_CACHE_DIR, `${key}.svg`) + if (existsSync(diskPath)) { + const cached = readFileSync(diskPath, 'utf8') + memCache.set(key, cached) + return cached + } + } + + // 3. miss — render with mermaid-cli + const dir = mkdtempSync(join(tmpdir(), 'markup-antora-mm-')) const inputPath = join(dir, 'in.mmd') const outputPath = join(dir, 'out.svg') writeFileSync(inputPath, source, 'utf8') @@ -48,7 +81,15 @@ function renderMermaidToSvg (source) { `${MMDC_BIN} -i ${inputPath} -o ${outputPath} -p ${PUPPETEER_CONFIG} --quiet`, { stdio: ['ignore', 'ignore', 'pipe'] } ) - return readFileSync(outputPath, 'utf8') + const svg = readFileSync(outputPath, 'utf8') + memCache.set(key, svg) + if (diskPath) { + try { + if (!existsSync(DISK_CACHE_DIR)) mkdirSync(DISK_CACHE_DIR, { recursive: true }) + writeFileSync(diskPath, svg, 'utf8') + } catch (_) { /* cache is best-effort */ } + } + return svg } finally { try { rmSync(dir, { recursive: true, force: true }) } catch (_) { /* noop */ } } diff --git a/docs/.docker/puppeteer-config.json b/docs/.docker/puppeteer-config.json new file mode 100644 index 00000000..1d9366e8 --- /dev/null +++ b/docs/.docker/puppeteer-config.json @@ -0,0 +1,4 @@ +{ + "executablePath": "/usr/bin/chromium-browser", + "args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"] +}