From 41a2da731e4b0cc1bad2093596ee853a05c19d50 Mon Sep 17 00:00:00 2001 From: Michal Harakal Date: Mon, 29 Jun 2026 13:37:58 +0200 Subject: [PATCH] docs(antora): switch Mermaid to offline rendering via "markup-antora" image Migrate the docs image to the consolidated "markup-antora" definition (one image shared across the SKaiNET docs projects, vendored here until the public registry image is published) and switch Mermaid rendering from the external Kroki round-trip to fully offline. Playbook: drop asciidoctor-kroki + kroki-fetch-diagram in favour of /opt/antora/local-mermaid-extension.js; add runtime.cache_dir so the container runs rootless. Matches mainline SKaiNET. Features of the merged image: - Offline Mermaid: every [mermaid] block is rendered to inline SVG at build time by mermaid-cli (Alpine Chromium + Puppeteer). No Kroki, no kroki.io, no network at build or view time. Removes the asciidoctor- kroki 4 KB GET-URL limit that rejected large diagrams. - Diagram caching: content-hash, in-memory + optional on-disk (MERMAID_CACHE_DIR); identical diagrams render once. - Rootless-safe under --user $(id -u):$(id -g) (HOME=/tmp + cleanup of root-owned /tmp dirs; fixes Chromium crashpad / cosmiconfig EACCES). - Build-time mermaid smoke test (a broken image fails docker build). - Offline extras available: @antora/lunr-extension, pre-baked Antora UI bundle, MathJax es5; asciidoctor-kroki kept as an escape hatch. - Full Alpine font set for diagram labels, emoji and CJK. CI is unchanged: docs.yml already builds from docs/.docker. Usage and details: see docs/.docker/README.md. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/.docker/Dockerfile | 100 +++++++++++++++--- docs/.docker/README.md | 76 ++++++++++++++ docs/.docker/local-mermaid-extension.js | 132 ++++++++++++++++++++++++ docs/.docker/puppeteer-config.json | 4 + docs/antora-playbook.yml | 18 +++- 5 files changed, 308 insertions(+), 22 deletions(-) create mode 100644 docs/.docker/README.md create mode 100644 docs/.docker/local-mermaid-extension.js create mode 100644 docs/.docker/puppeteer-config.json diff --git a/docs/.docker/Dockerfile b/docs/.docker/Dockerfile index 67c21ba6..a9900af7 100644 --- a/docs/.docker/Dockerfile +++ b/docs/.docker/Dockerfile @@ -1,37 +1,103 @@ +# markup-antora — one Antora image to use everywhere. +# +# Consolidates the best of the ~9 Antora images that grew across the +# projects tree. Design priorities, in order: +# 1. Mermaid rendered FULLY OFFLINE — no Kroki server, no kroki.io, +# no CDN at build time and none at view time. Diagrams are baked +# to inline SVG by mermaid-cli (Alpine Chromium + Puppeteer) +# through the local-mermaid-extension.js Asciidoctor block +# processor, with content-hash caching so repeated diagrams render +# once per build. +# 2. Works under `--user $(id -u):$(id -g)` (rootless) without the +# Chromium crashpad / cosmiconfig EACCES failures. +# 3. Offline extras available but not forced: lunr full-text search, +# a pre-baked Antora UI bundle, and MathJax es5 for LaTeX. +# 4. asciidoctor-kroki installed-but-unused as an escape hatch. FROM node:20-alpine -LABEL org.opencontainers.image.title="SKaiNET Antora" \ - org.opencontainers.image.description="Antora site generator with built-in Mermaid rendering" \ - org.opencontainers.image.source="https://github.com/SKaiNET-developers/SKaiNET-transformers" +LABEL org.opencontainers.image.title="markup-antora" \ + org.opencontainers.image.description="Universal Antora site generator with offline Mermaid (mermaid-cli), offline search (lunr), pre-baked UI bundle + MathJax. No Kroki, no CDN." \ + org.opencontainers.image.source="https://github.com/SKaiNET-developers/SKaiNET" -# Chromium for mermaid-cli (puppeteer) -RUN apk add --no-cache chromium font-noto +# Chromium for mermaid-cli (Puppeteer). Full font set so diagram labels, +# emoji and CJK render correctly (merged from the Daily-StandAPP image). +RUN apk add --no-cache \ + chromium \ + nss \ + freetype \ + harfbuzz \ + ttf-freefont \ + font-noto \ + font-noto-emoji \ + ca-certificates \ + git +# HOME=/tmp: Chromium's crashpad handler writes its database under $HOME +# and aborts with `--database is required` when the container runs as a +# non-root --user and $HOME falls back to `/` (no passwd entry, not +# writable). Same motivation as runtime.cache_dir in the playbook. ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser \ - PUPPETEER_SKIP_DOWNLOAD=true + PUPPETEER_SKIP_DOWNLOAD=true \ + HOME=/tmp -# Install Antora + extensions to /opt/antora (not /antora which gets volume-mounted) +# Install Antora + tooling into /opt/antora (NOT /antora, which is where +# the project gets volume-mounted at run time). +# - @mermaid-js/mermaid-cli : offline diagram rendering (the point) +# - @antora/lunr-extension : offline full-text search +# - asciidoctor-kroki : escape hatch only; the playbook should +# use the local mermaid extension instead. WORKDIR /opt/antora RUN npm init -y && npm i --save-exact \ @antora/cli@3.1 \ @antora/site-generator@3.1 \ - asciidoctor-kroki@0.18 \ + @antora/lunr-extension@1.0.0-alpha.8 \ @mermaid-js/mermaid-cli@11 \ + asciidoctor-kroki@0.18 \ && npm cache clean --force -# Make installed modules visible when workdir is the mounted project +# Make installed modules resolvable even when the workdir is the mounted +# project (which has no node_modules of its own). ENV NODE_PATH=/opt/antora/node_modules -# Mermaid-cli config -RUN echo '{ \ - "executablePath": "/usr/bin/chromium-browser", \ - "args": ["--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage"] \ -}' > /opt/antora/puppeteer-config.json +# Mermaid-cli / Puppeteer config and the offline block processor, baked +# in at absolute paths the playbook can reference without mount gymnastics. +COPY puppeteer-config.json /opt/antora/puppeteer-config.json +COPY local-mermaid-extension.js /opt/antora/local-mermaid-extension.js + +# --- Offline assets (available, not forced) ------------------------------- + +# Pre-download the default Antora UI bundle so sites build without hitting +# gitlab.com. Reference it from a playbook with: +# ui: +# bundle: +# url: /opt/antora-ui/ui-bundle.zip +# snapshot: true +RUN mkdir -p /opt/antora-ui \ + && wget -q -O /opt/antora-ui/ui-bundle.zip \ + "https://gitlab.com/antora/antora-ui-default/-/jobs/artifacts/HEAD/raw/build/ui-bundle.zip?job=bundle-stable" + +# Pre-download MathJax es5 for offline LaTeX. Copy /opt/mathjax/es5 into a +# supplemental UI or reference it from your UI template for client-side math. +RUN mkdir -p /opt/mathjax \ + && npm pack mathjax@3 --pack-destination /tmp \ + && tar -xzf /tmp/mathjax-*.tgz -C /tmp \ + && cp -r /tmp/package/es5 /opt/mathjax/es5 \ + && rm -rf /tmp/mathjax-* /tmp/package + +# --- Build-time smoke test + rootless cleanup ----------------------------- -# Verify mermaid works +# Verify mermaid-cli works end to end so a broken image fails the build, +# not the user's first run. The cleanup also removes the mode-0700 +# root-owned dirs (/tmp/.config/puppeteer, /tmp/.local/share/chromium, +# /tmp/.cache, /tmp/.npm) that Puppeteer/Chromium drop into $HOME during +# this run — leaving them would make cosmiconfig EACCES when the container +# is later launched with a non-root --user. RUN echo 'graph TD; A-->B;' > /tmp/test.mmd \ - && npx mmdc -i /tmp/test.mmd -o /tmp/test.svg -p /opt/antora/puppeteer-config.json \ - && rm /tmp/test.mmd /tmp/test.svg + && /opt/antora/node_modules/.bin/mmdc \ + -i /tmp/test.mmd -o /tmp/test.svg \ + -p /opt/antora/puppeteer-config.json --quiet \ + && rm -rf /tmp/test.mmd /tmp/test.svg /tmp/.config /tmp/.local /tmp/.npm /tmp/.cache +WORKDIR /antora ENTRYPOINT ["/opt/antora/node_modules/.bin/antora"] CMD ["--stacktrace", "antora-playbook.yml"] diff --git a/docs/.docker/README.md b/docs/.docker/README.md new file mode 100644 index 00000000..9f608b09 --- /dev/null +++ b/docs/.docker/README.md @@ -0,0 +1,76 @@ +# Docs Antora image (`docs/.docker`) + +Self-contained Antora image used to build this repo's documentation site. +It is the consolidated "markup-antora" image — one definition shared across +the SKaiNET docs projects — vendored here until the public registry image +is published (after which this `Dockerfile` collapses to a single `FROM`). + +> **Migration note:** this repo previously rendered Mermaid through +> `asciidoctor-kroki` (an external Kroki round-trip). It now renders Mermaid +> **fully offline** via `local-mermaid-extension.js`, matching mainline SKaiNET. + +## Features + +- **Offline Mermaid** — every `[mermaid]` block is rendered to **inline SVG** + at build time by `mermaid-cli` (Alpine Chromium + Puppeteer) via the baked-in + `local-mermaid-extension.js` Asciidoctor block processor. No Kroki server, + no `kroki.io`, no network — at build time *or* view time. Removes the + asciidoctor-kroki 4 KB GET-URL limit that rejected large diagrams. +- **Diagram caching** — content-hash, in-memory + optional on-disk + (`MERMAID_CACHE_DIR`); identical diagrams render once. +- **Rootless-safe** — runs under `--user $(id -u):$(id -g)` without the + Chromium crashpad / cosmiconfig `EACCES` failures (`HOME=/tmp`, build-time + cleanup of root-owned `/tmp` dirs). +- **Build-time smoke test** — a broken image fails `docker build`, not your + first render. +- **Offline extras** — `@antora/lunr-extension` (search), a pre-baked Antora + UI bundle, and MathJax es5 for LaTeX are available in the image. +- **Kroki escape hatch** — `asciidoctor-kroki` is installed (unused here) for + other diagram types if ever needed. +- Full Alpine font set (`font-noto`, `font-noto-emoji`, `ttf-freefont`, …) so + diagram labels, emoji and CJK render correctly. + +## Files + +| File | Purpose | +|---|---| +| `Dockerfile` | The consolidated image definition (build context = this dir). | +| `local-mermaid-extension.js` | Offline Mermaid block processor; baked to `/opt/antora/`. | +| `puppeteer-config.json` | Chromium flags for mermaid-cli; baked to `/opt/antora/`. | + +The playbook wires the extension via +`asciidoc.extensions: [ /opt/antora/local-mermaid-extension.js ]`. + +## Usage + +Build the image (context is this directory): + +```bash +docker build -t skainet-antora:local -f docs/.docker/Dockerfile docs/.docker/ +``` + +Render the site (run from the repo root; mount the repo at `/antora`, run as +your user so output isn't root-owned): + +```bash +docker run --rm \ + --user "$(id -u):$(id -g)" \ + -v "$PWD:/antora" \ + --workdir /antora/docs \ + skainet-antora:local \ + --stacktrace antora-playbook.yml + +# Output: docs/build/site/index.html +``` + +This is exactly what `.github/workflows/docs.yml` does in CI — it builds the +image from this directory and runs the container the same way. + +Write diagrams as normal Asciidoctor blocks: + +```adoc +[mermaid] +---- +graph TD; A-->B; B-->C; +---- +``` diff --git a/docs/.docker/local-mermaid-extension.js b/docs/.docker/local-mermaid-extension.js new file mode 100644 index 00000000..a7163e90 --- /dev/null +++ b/docs/.docker/local-mermaid-extension.js @@ -0,0 +1,132 @@ +'use strict' + +/* + * Offline Mermaid block processor for Asciidoctor.js / Antora. + * + * Replaces asciidoctor-kroki's dependency on a Kroki HTTP server + * (kroki.io or a local container — GET URL length limits, 400s on + * large diagrams, no offline fallback) with a direct, synchronous + * invocation of `mmdc` (@mermaid-js/mermaid-cli) baked into the + * markup-antora image at /opt/antora. Nothing leaves the container. + * + * For every `[mermaid]\n----\n...\n----` (or literal `....`) block: + * 1. hash the source (md5) and consult the cache + * 2. on miss: write source to a temp file, run mmdc to produce SVG, + * read it back, store in the cache + * 3. inline the SVG via a `pass` block so Asciidoctor emits raw SVG + * straight into the HTML output + * + * Caching (merged from the Daily-StandAPP pipeline extension): + * - In-memory Map, keyed by content hash, dedupes identical diagrams + * across every page in a single build run. + * - If MERMAID_CACHE_DIR is set, the SVG is also persisted there as + * .svg, so unchanged diagrams survive across build runs + * (point it at a host-mounted, writable dir to benefit). + * + * On render failure we degrade to a literal block containing the + * original source plus the error — matching asciidoctor-kroki's mode. + * + * Register it from the Antora playbook: + * asciidoc: + * extensions: + * - /opt/antora/local-mermaid-extension.js + */ + +const { execSync } = require('child_process') +const { mkdtempSync, writeFileSync, readFileSync, rmSync, + existsSync, mkdirSync } = require('fs') +const { tmpdir } = require('os') +const { join } = require('path') +const { createHash } = require('crypto') + +// Absolute paths baked into /opt/antora at image build time. These must +// match the Dockerfile that installs mermaid-cli and the puppeteer config. +const MMDC_BIN = '/opt/antora/node_modules/.bin/mmdc' +const PUPPETEER_CONFIG = '/opt/antora/puppeteer-config.json' + +// Optional cross-run disk cache. +const DISK_CACHE_DIR = process.env.MERMAID_CACHE_DIR || null + +// In-process cache: hash -> svg. Dedupes within a single build run. +const memCache = new Map() + +function hashOf (source) { + return createHash('md5').update(source).digest('hex').slice(0, 16) +} + +function renderMermaidToSvg (source) { + const key = hashOf(source) + + // 1. in-memory hit + if (memCache.has(key)) return memCache.get(key) + + // 2. disk hit + let diskPath = null + if (DISK_CACHE_DIR) { + diskPath = join(DISK_CACHE_DIR, `${key}.svg`) + if (existsSync(diskPath)) { + const cached = readFileSync(diskPath, 'utf8') + memCache.set(key, cached) + return cached + } + } + + // 3. miss — render with mermaid-cli + const dir = mkdtempSync(join(tmpdir(), 'markup-antora-mm-')) + const inputPath = join(dir, 'in.mmd') + const outputPath = join(dir, 'out.svg') + writeFileSync(inputPath, source, 'utf8') + try { + execSync( + `${MMDC_BIN} -i ${inputPath} -o ${outputPath} -p ${PUPPETEER_CONFIG} --quiet`, + { stdio: ['ignore', 'ignore', 'pipe'] } + ) + const svg = readFileSync(outputPath, 'utf8') + memCache.set(key, svg) + if (diskPath) { + try { + if (!existsSync(DISK_CACHE_DIR)) mkdirSync(DISK_CACHE_DIR, { recursive: true }) + writeFileSync(diskPath, svg, 'utf8') + } catch (_) { /* cache is best-effort */ } + } + return svg + } finally { + try { rmSync(dir, { recursive: true, force: true }) } catch (_) { /* noop */ } + } +} + +function mermaidBlockFactory () { + return function () { + const self = this + self.named('mermaid') + self.onContext(['listing', 'literal']) + self.process((parent, reader, attrs) => { + const source = reader.$read() + try { + const svg = renderMermaidToSvg(source) + return self.createBlock(parent, 'pass', svg, attrs) + } catch (err) { + const logger = parent.getDocument().getLogger() + logger.warn(`local-mermaid-extension: failed to render block — ${err.message}`) + const role = attrs.role + attrs.role = role ? `${role} mermaid-error` : 'mermaid-error' + return self.createBlock( + parent, + 'literal', + `Error rendering mermaid diagram:\n${err.message}\n\n${source}`, + attrs + ) + } + }) + } +} + +module.exports.register = function register (registry) { + if (typeof registry.register === 'function') { + registry.register(function () { + this.block('mermaid', mermaidBlockFactory()) + }) + } else if (typeof registry.block === 'function') { + registry.block('mermaid', mermaidBlockFactory()) + } +} diff --git a/docs/.docker/puppeteer-config.json b/docs/.docker/puppeteer-config.json new file mode 100644 index 00000000..1d9366e8 --- /dev/null +++ b/docs/.docker/puppeteer-config.json @@ -0,0 +1,4 @@ +{ + "executablePath": "/usr/bin/chromium-browser", + "args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"] +} diff --git a/docs/antora-playbook.yml b/docs/antora-playbook.yml index 18daa79b..14d0a3d4 100644 --- a/docs/antora-playbook.yml +++ b/docs/antora-playbook.yml @@ -2,6 +2,12 @@ site: title: SKaiNET Transformers start_page: skainet-transformers::index.adoc +# Keep Antora's content cache inside the project tree so the container +# can be run as a non-root user (docker run --user $(id -u):$(id -g)). +# Matches mainline SKaiNET's playbook. +runtime: + cache_dir: ./.cache/antora + content: sources: - url: /antora @@ -10,11 +16,13 @@ content: asciidoc: extensions: - - asciidoctor-kroki - attributes: - # Use local mermaid-cli via Kroki (no external server needed when - # built with the custom Docker image in docs/.docker/Dockerfile) - kroki-fetch-diagram: true + # Local mermaid block processor — renders every `[mermaid]` block + # inline by invoking the @mermaid-js/mermaid-cli binary baked into + # the Docker image at /opt/antora/node_modules/.bin/mmdc. Replaces + # asciidoctor-kroki so builds don't depend on kroki.io at all + # (no GET-URL length limit, no external server). Same approach as + # mainline SKaiNET. + - /opt/antora/local-mermaid-extension.js ui: bundle: