Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 83 additions & 17 deletions docs/.docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,37 +1,103 @@
# markup-antora — one Antora image to use everywhere.
#
# Consolidates the best of the ~9 Antora images that grew across the
# projects tree. Design priorities, in order:
# 1. Mermaid rendered FULLY OFFLINE — no Kroki server, no kroki.io,
# no CDN at build time and none at view time. Diagrams are baked
# to inline SVG by mermaid-cli (Alpine Chromium + Puppeteer)
# through the local-mermaid-extension.js Asciidoctor block
# processor, with content-hash caching so repeated diagrams render
# once per build.
# 2. Works under `--user $(id -u):$(id -g)` (rootless) without the
# Chromium crashpad / cosmiconfig EACCES failures.
# 3. Offline extras available but not forced: lunr full-text search,
# a pre-baked Antora UI bundle, and MathJax es5 for LaTeX.
# 4. asciidoctor-kroki installed-but-unused as an escape hatch.
FROM node:20-alpine

LABEL org.opencontainers.image.title="SKaiNET Antora" \
org.opencontainers.image.description="Antora site generator with built-in Mermaid rendering" \
org.opencontainers.image.source="https://github.com/SKaiNET-developers/SKaiNET-transformers"
LABEL org.opencontainers.image.title="markup-antora" \
org.opencontainers.image.description="Universal Antora site generator with offline Mermaid (mermaid-cli), offline search (lunr), pre-baked UI bundle + MathJax. No Kroki, no CDN." \
org.opencontainers.image.source="https://github.com/SKaiNET-developers/SKaiNET"

# Chromium for mermaid-cli (puppeteer)
RUN apk add --no-cache chromium font-noto
# Chromium for mermaid-cli (Puppeteer). Full font set so diagram labels,
# emoji and CJK render correctly (merged from the Daily-StandAPP image).
RUN apk add --no-cache \
chromium \
nss \
freetype \
harfbuzz \
ttf-freefont \
font-noto \
font-noto-emoji \
ca-certificates \
git

# HOME=/tmp: Chromium's crashpad handler writes its database under $HOME
# and aborts with `--database is required` when the container runs as a
# non-root --user and $HOME falls back to `/` (no passwd entry, not
# writable). Same motivation as runtime.cache_dir in the playbook.
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser \
PUPPETEER_SKIP_DOWNLOAD=true
PUPPETEER_SKIP_DOWNLOAD=true \
HOME=/tmp

# Install Antora + extensions to /opt/antora (not /antora which gets volume-mounted)
# Install Antora + tooling into /opt/antora (NOT /antora, which is where
# the project gets volume-mounted at run time).
# - @mermaid-js/mermaid-cli : offline diagram rendering (the point)
# - @antora/lunr-extension : offline full-text search
# - asciidoctor-kroki : escape hatch only; the playbook should
# use the local mermaid extension instead.
WORKDIR /opt/antora
RUN npm init -y && npm i --save-exact \
@antora/cli@3.1 \
@antora/site-generator@3.1 \
asciidoctor-kroki@0.18 \
@antora/lunr-extension@1.0.0-alpha.8 \
@mermaid-js/mermaid-cli@11 \
asciidoctor-kroki@0.18 \
&& npm cache clean --force

# Make installed modules visible when workdir is the mounted project
# Make installed modules resolvable even when the workdir is the mounted
# project (which has no node_modules of its own).
ENV NODE_PATH=/opt/antora/node_modules

# Mermaid-cli config
RUN echo '{ \
"executablePath": "/usr/bin/chromium-browser", \
"args": ["--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage"] \
}' > /opt/antora/puppeteer-config.json
# Mermaid-cli / Puppeteer config and the offline block processor, baked
# in at absolute paths the playbook can reference without mount gymnastics.
COPY puppeteer-config.json /opt/antora/puppeteer-config.json
COPY local-mermaid-extension.js /opt/antora/local-mermaid-extension.js

# --- Offline assets (available, not forced) -------------------------------

# Pre-download the default Antora UI bundle so sites build without hitting
# gitlab.com. Reference it from a playbook with:
# ui:
# bundle:
# url: /opt/antora-ui/ui-bundle.zip
# snapshot: true
RUN mkdir -p /opt/antora-ui \
&& wget -q -O /opt/antora-ui/ui-bundle.zip \
"https://gitlab.com/antora/antora-ui-default/-/jobs/artifacts/HEAD/raw/build/ui-bundle.zip?job=bundle-stable"

# Pre-download MathJax es5 for offline LaTeX. Copy /opt/mathjax/es5 into a
# supplemental UI or reference it from your UI template for client-side math.
RUN mkdir -p /opt/mathjax \
&& npm pack mathjax@3 --pack-destination /tmp \
&& tar -xzf /tmp/mathjax-*.tgz -C /tmp \
&& cp -r /tmp/package/es5 /opt/mathjax/es5 \
&& rm -rf /tmp/mathjax-* /tmp/package

# --- Build-time smoke test + rootless cleanup -----------------------------

# Verify mermaid works
# Verify mermaid-cli works end to end so a broken image fails the build,
# not the user's first run. The cleanup also removes the mode-0700
# root-owned dirs (/tmp/.config/puppeteer, /tmp/.local/share/chromium,
# /tmp/.cache, /tmp/.npm) that Puppeteer/Chromium drop into $HOME during
# this run — leaving them would make cosmiconfig EACCES when the container
# is later launched with a non-root --user.
RUN echo 'graph TD; A-->B;' > /tmp/test.mmd \
&& npx mmdc -i /tmp/test.mmd -o /tmp/test.svg -p /opt/antora/puppeteer-config.json \
&& rm /tmp/test.mmd /tmp/test.svg
&& /opt/antora/node_modules/.bin/mmdc \
-i /tmp/test.mmd -o /tmp/test.svg \
-p /opt/antora/puppeteer-config.json --quiet \
&& rm -rf /tmp/test.mmd /tmp/test.svg /tmp/.config /tmp/.local /tmp/.npm /tmp/.cache

WORKDIR /antora
ENTRYPOINT ["/opt/antora/node_modules/.bin/antora"]
CMD ["--stacktrace", "antora-playbook.yml"]
76 changes: 76 additions & 0 deletions docs/.docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Docs Antora image (`docs/.docker`)

Self-contained Antora image used to build this repo's documentation site.
It is the consolidated "markup-antora" image — one definition shared across
the SKaiNET docs projects — vendored here until the public registry image
is published (after which this `Dockerfile` collapses to a single `FROM`).

> **Migration note:** this repo previously rendered Mermaid through
> `asciidoctor-kroki` (an external Kroki round-trip). It now renders Mermaid
> **fully offline** via `local-mermaid-extension.js`, matching mainline SKaiNET.

## Features

- **Offline Mermaid** — every `[mermaid]` block is rendered to **inline SVG**
at build time by `mermaid-cli` (Alpine Chromium + Puppeteer) via the baked-in
`local-mermaid-extension.js` Asciidoctor block processor. No Kroki server,
no `kroki.io`, no network — at build time *or* view time. Removes the
asciidoctor-kroki 4 KB GET-URL limit that rejected large diagrams.
- **Diagram caching** — content-hash, in-memory + optional on-disk
(`MERMAID_CACHE_DIR`); identical diagrams render once.
- **Rootless-safe** — runs under `--user $(id -u):$(id -g)` without the
Chromium crashpad / cosmiconfig `EACCES` failures (`HOME=/tmp`, build-time
cleanup of root-owned `/tmp` dirs).
- **Build-time smoke test** — a broken image fails `docker build`, not your
first render.
- **Offline extras** — `@antora/lunr-extension` (search), a pre-baked Antora
UI bundle, and MathJax es5 for LaTeX are available in the image.
- **Kroki escape hatch** — `asciidoctor-kroki` is installed (unused here) for
other diagram types if ever needed.
- Full Alpine font set (`font-noto`, `font-noto-emoji`, `ttf-freefont`, …) so
diagram labels, emoji and CJK render correctly.

## Files

| File | Purpose |
|---|---|
| `Dockerfile` | The consolidated image definition (build context = this dir). |
| `local-mermaid-extension.js` | Offline Mermaid block processor; baked to `/opt/antora/`. |
| `puppeteer-config.json` | Chromium flags for mermaid-cli; baked to `/opt/antora/`. |

The playbook wires the extension via
`asciidoc.extensions: [ /opt/antora/local-mermaid-extension.js ]`.

## Usage

Build the image (context is this directory):

```bash
docker build -t skainet-antora:local -f docs/.docker/Dockerfile docs/.docker/
```

Render the site (run from the repo root; mount the repo at `/antora`, run as
your user so output isn't root-owned):

```bash
docker run --rm \
--user "$(id -u):$(id -g)" \
-v "$PWD:/antora" \
--workdir /antora/docs \
skainet-antora:local \
--stacktrace antora-playbook.yml

# Output: docs/build/site/index.html
```

This is exactly what `.github/workflows/docs.yml` does in CI — it builds the
image from this directory and runs the container the same way.

Write diagrams as normal Asciidoctor blocks:

```adoc
[mermaid]
----
graph TD; A-->B; B-->C;
----
```
132 changes: 132 additions & 0 deletions docs/.docker/local-mermaid-extension.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
'use strict'

/*
* Offline Mermaid block processor for Asciidoctor.js / Antora.
*
* Replaces asciidoctor-kroki's dependency on a Kroki HTTP server
* (kroki.io or a local container — GET URL length limits, 400s on
* large diagrams, no offline fallback) with a direct, synchronous
* invocation of `mmdc` (@mermaid-js/mermaid-cli) baked into the
* markup-antora image at /opt/antora. Nothing leaves the container.
*
* For every `[mermaid]\n----\n...\n----` (or literal `....`) block:
* 1. hash the source (md5) and consult the cache
* 2. on miss: write source to a temp file, run mmdc to produce SVG,
* read it back, store in the cache
* 3. inline the SVG via a `pass` block so Asciidoctor emits raw SVG
* straight into the HTML output
*
* Caching (merged from the Daily-StandAPP pipeline extension):
* - In-memory Map, keyed by content hash, dedupes identical diagrams
* across every page in a single build run.
* - If MERMAID_CACHE_DIR is set, the SVG is also persisted there as
* <hash>.svg, so unchanged diagrams survive across build runs
* (point it at a host-mounted, writable dir to benefit).
*
* On render failure we degrade to a literal block containing the
* original source plus the error — matching asciidoctor-kroki's mode.
*
* Register it from the Antora playbook:
* asciidoc:
* extensions:
* - /opt/antora/local-mermaid-extension.js
*/

const { execSync } = require('child_process')
const { mkdtempSync, writeFileSync, readFileSync, rmSync,
existsSync, mkdirSync } = require('fs')
const { tmpdir } = require('os')
const { join } = require('path')
const { createHash } = require('crypto')

// Absolute paths baked into /opt/antora at image build time. These must
// match the Dockerfile that installs mermaid-cli and the puppeteer config.
const MMDC_BIN = '/opt/antora/node_modules/.bin/mmdc'
const PUPPETEER_CONFIG = '/opt/antora/puppeteer-config.json'

// Optional cross-run disk cache.
const DISK_CACHE_DIR = process.env.MERMAID_CACHE_DIR || null

// In-process cache: hash -> svg. Dedupes within a single build run.
const memCache = new Map()

function hashOf (source) {
return createHash('md5').update(source).digest('hex').slice(0, 16)
}

function renderMermaidToSvg (source) {
const key = hashOf(source)

// 1. in-memory hit
if (memCache.has(key)) return memCache.get(key)

// 2. disk hit
let diskPath = null
if (DISK_CACHE_DIR) {
diskPath = join(DISK_CACHE_DIR, `${key}.svg`)
if (existsSync(diskPath)) {
const cached = readFileSync(diskPath, 'utf8')
memCache.set(key, cached)
return cached
}
}

// 3. miss — render with mermaid-cli
const dir = mkdtempSync(join(tmpdir(), 'markup-antora-mm-'))
const inputPath = join(dir, 'in.mmd')
const outputPath = join(dir, 'out.svg')
writeFileSync(inputPath, source, 'utf8')
try {
execSync(
`${MMDC_BIN} -i ${inputPath} -o ${outputPath} -p ${PUPPETEER_CONFIG} --quiet`,
{ stdio: ['ignore', 'ignore', 'pipe'] }
)
const svg = readFileSync(outputPath, 'utf8')
memCache.set(key, svg)
if (diskPath) {
try {
if (!existsSync(DISK_CACHE_DIR)) mkdirSync(DISK_CACHE_DIR, { recursive: true })
writeFileSync(diskPath, svg, 'utf8')
} catch (_) { /* cache is best-effort */ }
}
return svg
} finally {
try { rmSync(dir, { recursive: true, force: true }) } catch (_) { /* noop */ }
}
}

function mermaidBlockFactory () {
return function () {
const self = this
self.named('mermaid')
self.onContext(['listing', 'literal'])
self.process((parent, reader, attrs) => {
const source = reader.$read()
try {
const svg = renderMermaidToSvg(source)
return self.createBlock(parent, 'pass', svg, attrs)
} catch (err) {
const logger = parent.getDocument().getLogger()
logger.warn(`local-mermaid-extension: failed to render block — ${err.message}`)
const role = attrs.role
attrs.role = role ? `${role} mermaid-error` : 'mermaid-error'
return self.createBlock(
parent,
'literal',
`Error rendering mermaid diagram:\n${err.message}\n\n${source}`,
attrs
)
}
})
}
}

module.exports.register = function register (registry) {
if (typeof registry.register === 'function') {
registry.register(function () {
this.block('mermaid', mermaidBlockFactory())
})
} else if (typeof registry.block === 'function') {
registry.block('mermaid', mermaidBlockFactory())
}
}
4 changes: 4 additions & 0 deletions docs/.docker/puppeteer-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"executablePath": "/usr/bin/chromium-browser",
"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-gpu", "--disable-dev-shm-usage"]
}
18 changes: 13 additions & 5 deletions docs/antora-playbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ site:
title: SKaiNET Transformers
start_page: skainet-transformers::index.adoc

# Keep Antora's content cache inside the project tree so the container
# can be run as a non-root user (docker run --user $(id -u):$(id -g)).
# Matches mainline SKaiNET's playbook.
runtime:
cache_dir: ./.cache/antora

content:
sources:
- url: /antora
Expand All @@ -10,11 +16,13 @@ content:

asciidoc:
extensions:
- asciidoctor-kroki
attributes:
# Use local mermaid-cli via Kroki (no external server needed when
# built with the custom Docker image in docs/.docker/Dockerfile)
kroki-fetch-diagram: true
# Local mermaid block processor — renders every `[mermaid]` block
# inline by invoking the @mermaid-js/mermaid-cli binary baked into
# the Docker image at /opt/antora/node_modules/.bin/mmdc. Replaces
# asciidoctor-kroki so builds don't depend on kroki.io at all
# (no GET-URL length limit, no external server). Same approach as
# mainline SKaiNET.
- /opt/antora/local-mermaid-extension.js

ui:
bundle:
Expand Down
Loading