From 0b91b06d6afbc1dda0f116a066a6d923094c25a0 Mon Sep 17 00:00:00 2001 From: ambujsingh Date: Mon, 22 Jun 2026 16:59:02 +0530 Subject: [PATCH] refactor(plantuml): replace linker with idmap-based cross-diagram linking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the separate linker tool with idmap sidecar files emitted directly by the parser. Key changes: - Add puml_idmap crate: emits *.idmap.json sidecars with defines/references roles detected from diagram structure (component children, class methods/ variables, sequence participants) - Extend puml_cli with --source-name and --idmap-output-dir args; use the workspace-relative path identity instead of the basename for a stable, path-unique source identifier - Rewrite clickable_plantuml Sphinx extension: load idmaps at builder-inited, resolve references via FQN→alias lookup with a proximity tiebreak and a tie→no-link guard; emit correct relative URLs for svg_obj mode; and percent-encode injected PlantUML URLs - Update architectural_design.bzl: parser now emits 3 outputs (fbs, lobster, idmap); remove linker action and _linker attr - Delete plantuml/linker (replaced by idmap approach) --- bazel/rules/rules_score/README.md | 2 + .../rules_score/docs/tooling_architecture.rst | 10 +- .../sample_fmea_failure_modes.trlc | 2 +- .../private/architectural_design.bzl | 177 +++--- plantuml/linker/README.md | 12 - plantuml/linker/src/main.rs | 266 --------- plantuml/parser/BUILD | 4 +- plantuml/parser/puml_cli/BUILD | 2 + plantuml/parser/puml_cli/src/main.rs | 56 +- plantuml/{linker => parser/puml_idmap}/BUILD | 28 +- plantuml/parser/puml_idmap/src/lib.rs | 497 ++++++++++++++++ plantuml/parser/puml_lobster/src/lib.rs | 17 +- plantuml/sphinx/clickable_plantuml/README.md | 250 +++++--- .../clickable_plantuml/clickable_plantuml.py | 532 ++++++++++++++---- .../sphinx/clickable_plantuml/tests/BUILD | 24 + .../tests/test_clickable_plantuml.py | 340 +++++++++++ 16 files changed, 1599 insertions(+), 620 deletions(-) delete mode 100644 plantuml/linker/README.md delete mode 100644 plantuml/linker/src/main.rs rename plantuml/{linker => parser/puml_idmap}/BUILD (53%) create mode 100644 plantuml/parser/puml_idmap/src/lib.rs create mode 100644 plantuml/sphinx/clickable_plantuml/tests/BUILD create mode 100644 plantuml/sphinx/clickable_plantuml/tests/test_clickable_plantuml.py diff --git a/bazel/rules/rules_score/README.md b/bazel/rules/rules_score/README.md index aea4c605..ac266d91 100644 --- a/bazel/rules/rules_score/README.md +++ b/bazel/rules/rules_score/README.md @@ -92,6 +92,8 @@ architectural_design( - a `.lobster` traceability file (Interface elements only) — consumed by LOBSTER - a `plantuml_links.json` — consumed by the `clickable_plantuml` Sphinx extension - a `validation.log` from the `architectural-design` validation profile +- a `.idmap.json` sidecar — consumed by the `clickable_plantuml` Sphinx extension + to resolve cross-diagram links based on element *defines/references* roles Diagrams in `public_api` are classified separately so their lobster items flow through `public_api_lobster_files` for failure-mode traceability. diff --git a/bazel/rules/rules_score/docs/tooling_architecture.rst b/bazel/rules/rules_score/docs/tooling_architecture.rst index a5ce61e1..f515a02f 100644 --- a/bazel/rules/rules_score/docs/tooling_architecture.rst +++ b/bazel/rules/rules_score/docs/tooling_architecture.rst @@ -91,7 +91,7 @@ are rendered under :doc:`tool_reference/index`. - Converts RST requirement directives (``feat_req``, ``comp_req``, …) into ``.trlc`` records so requirements can be authored in either RST or TRLC. * - **PlantUML Parser** - - ``@score_tooling//plantuml/parser:parser`` (Rust) + ``:linker`` + - ``@score_tooling//plantuml/parser:parser`` (Rust) - ``architectural_design``, ``unit_design`` - Parses ``.puml`` diagrams into a FlatBuffers AST (``.fbs.bin``, one ``root_type`` per diagram kind) and extracts interface ``.lobster`` @@ -115,6 +115,14 @@ are rendered under :doc:`tool_reference/index`. * - **fmea_assembler** - ``//bazel/rules/rules_score:fmea_assembler`` (``src/fmea_assembler.py``, local; links the ``TRLCRST`` library) + ``root_type`` per diagram kind), extracts interface ``.lobster`` items, + and emits ``.idmap.json`` sidecars recording the *defines/references* + roles of each element. The ``clickable_plantuml`` Sphinx extension reads + these sidecars to resolve cross-diagram links without a separate linker + step. Rejects syntactically invalid diagrams with a non-zero exit code. + * - **safety_analysis_tools** + - ``//bazel/rules/rules_score:safety_analysis_tools`` + (``src/safety_analysis_tools.py``, local) - ``fmea`` - Assembles the failure-mode-centric ``fmea.rst`` from ``fta_chains.json`` plus the FailureMode / ControlMeasure records in one in-process TRLC diff --git a/bazel/rules/rules_score/examples/seooc/safety_analysis/sample_fmea_failure_modes.trlc b/bazel/rules/rules_score/examples/seooc/safety_analysis/sample_fmea_failure_modes.trlc index 5e4cef42..5c7135bc 100644 --- a/bazel/rules/rules_score/examples/seooc/safety_analysis/sample_fmea_failure_modes.trlc +++ b/bazel/rules/rules_score/examples/seooc/safety_analysis/sample_fmea_failure_modes.trlc @@ -24,7 +24,7 @@ ScoreReq.FailureMode SampleFailureMode{ } ScoreReq.FailureMode SampleFailureMode2{ - guideword = ScoreReq.GuideWord.TooLate + guidewords = [ScoreReq.guidewords.TooLate] description = "SampleFailureMode2 responds too late" failureeffect = "Downstream consumers time out" version = 1 diff --git a/bazel/rules/rules_score/private/architectural_design.bzl b/bazel/rules/rules_score/private/architectural_design.bzl index b8a1b69c..7b8ee3fa 100644 --- a/bazel/rules/rules_score/private/architectural_design.bzl +++ b/bazel/rules/rules_score/private/architectural_design.bzl @@ -32,19 +32,23 @@ load("//bazel/rules/rules_score/private:verbosity.bzl", "VERBOSITY_ATTR", "get_l # ============================================================================ def _run_puml_parser(ctx, puml_file): - """Run the PlantUML parser on a single .puml file to produce a FlatBuffers binary - and a lobster traceability file. + """Run the PlantUML parser on a single .puml file. - The diagram type is auto-detected by the parser and encoded in the - FlatBuffers schema (each diagram type uses its own root_type). - Lobster output is produced in-process for component diagrams. + Produces three output files: + - a FlatBuffers binary (``.fbs.bin``), + - a LOBSTER traceability file (``.lobster``), and + - an idmap sidecar (``.idmap.json``) used by the + ``clickable_plantuml`` Sphinx extension to resolve cross-diagram links. + + ``puml_file.short_path`` (workspace-relative) is passed as ``--source-name`` + so the idmap ``source`` field is a stable, path-unique identifier. Args: ctx: Rule context puml_file: The .puml File object to parse Returns: - Tuple of (fbs_output, lobster_output) declared output Files. + Tuple of (fbs_output, lobster_output, idmap_output) declared output Files. """ file_stem = puml_file.basename.rsplit(".", 1)[0] fbs_output = ctx.actions.declare_file( @@ -53,25 +57,32 @@ def _run_puml_parser(ctx, puml_file): lobster_output = ctx.actions.declare_file( "{}/{}.lobster".format(ctx.label.name, file_stem), ) + idmap_output = ctx.actions.declare_file( + "{}/{}.idmap.json".format(ctx.label.name, file_stem), + ) ctx.actions.run( inputs = [puml_file], - outputs = [fbs_output, lobster_output], + outputs = [fbs_output, lobster_output, idmap_output], executable = ctx.executable._puml_parser, arguments = [ "--file", puml_file.path, + "--source-name", + puml_file.short_path, "--fbs-output-dir", fbs_output.dirname, "--lobster-output-dir", lobster_output.dirname, + "--idmap-output-dir", + idmap_output.dirname, "--log-level", get_log_level(ctx), ], progress_message = "Parsing PlantUML diagram: %s" % puml_file.short_path, ) - return fbs_output, lobster_output + return fbs_output, lobster_output, idmap_output def _parse_puml_diagrams(ctx, files): """Run the PlantUML parser on all .puml/.plantuml files in a list. @@ -81,16 +92,18 @@ def _parse_puml_diagrams(ctx, files): files: List of File objects Returns: - Tuple of (fbs_outputs, lobster_outputs) lists of generated Files. + Tuple of (fbs_outputs, lobster_outputs, idmap_outputs) lists of generated Files. """ fbs_outputs = [] lobster_outputs = [] + idmap_outputs = [] for f in files: if f.extension in ("puml", "plantuml"): - fbs, lobster = _run_puml_parser(ctx, f) + fbs, lobster, idmap = _run_puml_parser(ctx, f) fbs_outputs.append(fbs) lobster_outputs.append(lobster) - return fbs_outputs, lobster_outputs + idmap_outputs.append(idmap) + return fbs_outputs, lobster_outputs, idmap_outputs def _run_validation(ctx, component_fbs_files, sequence_fbs_files, internal_api_fbs_files): """Run the architectural-design validation profile. @@ -138,46 +151,26 @@ def _architectural_design_impl(ctx): """ # Parse each architectural view separately so each provider field carries - # the flatbuffers for its own category. - static_fbs_list, static_lobster_list = _parse_puml_diagrams(ctx, ctx.files.static) - dynamic_fbs_list, dynamic_lobster_list = _parse_puml_diagrams(ctx, ctx.files.dynamic) - public_api_fbs_list, public_api_lobster_list = _parse_puml_diagrams(ctx, ctx.files.public_api) - internal_api_fbs_list, _internal_api_lobster_list = _parse_puml_diagrams(ctx, ctx.files.internal_api) + # the flatbuffers (and idmap sidecars) for its own category. + static_fbs_list, static_lobster_list, static_idmap_list = _parse_puml_diagrams(ctx, ctx.files.static) + dynamic_fbs_list, dynamic_lobster_list, dynamic_idmap_list = _parse_puml_diagrams(ctx, ctx.files.dynamic) + public_api_fbs_list, public_api_lobster_list, public_api_idmap_list = _parse_puml_diagrams(ctx, ctx.files.public_api) + internal_api_fbs_list, _internal_api_lobster_list, internal_api_idmap_list = _parse_puml_diagrams(ctx, ctx.files.internal_api) static_fbs = depset(static_fbs_list) dynamic_fbs = depset(dynamic_fbs_list) public_api_fbs = depset(public_api_fbs_list) internal_api_fbs = depset(internal_api_fbs_list) public_api_lobster = depset(public_api_lobster_list) + all_idmaps = depset(static_idmap_list + dynamic_idmap_list + public_api_idmap_list + internal_api_idmap_list) # Source files for SphinxSourcesInfo (sphinx documentation pipeline) all_source_files = depset( transitive = [depset(ctx.files.static), depset(ctx.files.dynamic), depset(ctx.files.public_api), depset(ctx.files.internal_api)], ) - # Run the linker on all generated .fbs.bin files to produce a - # plantuml_links.json for the clickable_plantuml Sphinx extension. - all_fbs_files = static_fbs.to_list() + dynamic_fbs.to_list() + public_api_fbs.to_list() + internal_api_fbs.to_list() - plantuml_links_json = ctx.actions.declare_file( - "{}/plantuml_links.json".format(ctx.label.name), - ) - if all_fbs_files: - ctx.actions.run( - inputs = all_fbs_files, - outputs = [plantuml_links_json], - executable = ctx.executable._linker, - arguments = ["--fbs-files"] + [f.path for f in all_fbs_files] + ["--output", plantuml_links_json.path, "--log-level", get_log_level(ctx)], - progress_message = "Generating PlantUML links JSON for %s" % ctx.label.name, - ) - else: - ctx.actions.write( - output = plantuml_links_json, - content = '{"links":[]}', - ) - sphinx_files = depset( - [plantuml_links_json], - transitive = [all_source_files], + transitive = [all_source_files, all_idmaps], ) # Generate a thin RST wrapper for every .puml diagram so it appears as a @@ -199,7 +192,9 @@ def _architectural_design_impl(ctx): sphinx_srcs = depset(rst_wrappers, transitive = [sphinx_files]) return [ - DefaultInfo(files = depset([validation_log.file], transitive = [all_source_files])), + # Expose source diagrams and generated idmap sidecars to generic + # consumers that read only DefaultInfo (for example sphinx_module srcs). + DefaultInfo(files = depset(transitive = [all_source_files, all_idmaps])), ArchitecturalDesignInfo( static = static_fbs, dynamic = dynamic_fbs, @@ -208,7 +203,8 @@ def _architectural_design_impl(ctx): public_api_lobster_files = public_api_lobster, validation_logs = [validation_log], ), - # Source diagram files + plantuml_links.json for the sphinx documentation build + # Source diagram files + .idmap.json sidecars for the sphinx documentation build. + # The clickable_plantuml extension reads *.idmap.json to resolve cross-diagram links. SphinxSourcesInfo( srcs = sphinx_srcs, deps = sphinx_srcs, @@ -220,65 +216,56 @@ def _architectural_design_impl(ctx): # Rule Definition # ============================================================================ -def _architectural_design_attrs(): - attrs = { - "static": attr.label_list( - allow_files = [".puml", ".plantuml", ".svg", ".rst", ".md"], - mandatory = False, - doc = "Static architecture diagrams (class diagrams, component diagrams, etc.)", - ), - "dynamic": attr.label_list( - allow_files = [".puml", ".plantuml", ".svg", ".rst", ".md"], - mandatory = False, - doc = "Dynamic architecture diagrams (sequence diagrams, activity diagrams, etc.)", - ), - "public_api": attr.label_list( - allow_files = [".puml", ".plantuml"], - mandatory = False, - doc = "Public API diagrams (parsed identically to static/dynamic). " + - "Classified separately so their lobster items are exposed via " + - "public_api_lobster_files, enabling failure-mode-to-interface " + - "traceability at the dependable element level.", - ), - "internal_api": attr.label_list( - allow_files = [".puml", ".plantuml"], - mandatory = False, - doc = "Internal API diagrams (class diagrams). " + - "Classified separately so their FlatBuffers outputs are exposed via " + - "ArchitecturalDesignInfo.internal_api for downstream validation.", - ), - "maturity": attr.string( - default = "release", - values = ["release", "development"], - doc = "Maturity level of the architectural design. 'release' treats validation findings as errors; 'development' emits warnings and continues.", - ), - "_puml_parser": attr.label( - default = Label("@score_tooling//plantuml/parser:parser"), - executable = True, - cfg = "exec", - doc = "PlantUML parser tool that generates FlatBuffers from .puml files", - ), - "_linker": attr.label( - default = Label("@score_tooling//plantuml/parser:linker"), - executable = True, - cfg = "exec", - doc = "Tool that generates plantuml_links.json from FlatBuffers diagram outputs", - ), - "_puml_rst_template": attr.label( - default = Label("//bazel/rules/rules_score:templates/puml_diagram.template.rst"), - allow_single_file = True, - doc = "RST template for PlantUML diagram wrapper pages.", - ), - } - attrs.update(VALIDATION_ATTRS) - attrs.update(VERBOSITY_ATTR) - return attrs - _architectural_design = rule( implementation = _architectural_design_impl, doc = "Collects architectural design documents and diagrams for S-CORE process compliance. " + "Automatically parses PlantUML files to produce FlatBuffers binary representations.", - attrs = _architectural_design_attrs(), + attrs = dict( + { + "static": attr.label_list( + allow_files = [".puml", ".plantuml", ".svg", ".rst", ".md"], + mandatory = False, + doc = "Static architecture diagrams (class diagrams, component diagrams, etc.)", + ), + "dynamic": attr.label_list( + allow_files = [".puml", ".plantuml", ".svg", ".rst", ".md"], + mandatory = False, + doc = "Dynamic architecture diagrams (sequence diagrams, activity diagrams, etc.)", + ), + "public_api": attr.label_list( + allow_files = [".puml", ".plantuml"], + mandatory = False, + doc = "Public API diagrams (parsed identically to static/dynamic). " + + "Classified separately so their lobster items are exposed via " + + "public_api_lobster_files, enabling failure-mode-to-interface " + + "traceability at the dependable element level.", + ), + "internal_api": attr.label_list( + allow_files = [".puml", ".plantuml"], + mandatory = False, + doc = "Internal API diagrams (class diagrams). " + + "Classified separately so their FlatBuffers outputs are exposed via " + + "ArchitecturalDesignInfo.internal_api for downstream validation.", + ), + "maturity": attr.string( + default = "release", + values = ["release", "development"], + doc = "Maturity level of the architectural design. 'release' treats validation findings as errors; 'development' emits warnings and continues.", + ), + "_puml_parser": attr.label( + default = Label("@score_tooling//plantuml/parser:puml_cli"), + executable = True, + cfg = "exec", + doc = "PlantUML parser tool that generates FlatBuffers from .puml files", + ), + "_puml_rst_template": attr.label( + default = Label("//bazel/rules/rules_score:templates/puml_diagram.template.rst"), + allow_single_file = True, + doc = "RST template for PlantUML diagram wrapper pages.", + ), + }, + **dict(VALIDATION_ATTRS, **VERBOSITY_ATTR) + ), ) # ============================================================================ diff --git a/plantuml/linker/README.md b/plantuml/linker/README.md deleted file mode 100644 index 710adbc9..00000000 --- a/plantuml/linker/README.md +++ /dev/null @@ -1,12 +0,0 @@ - diff --git a/plantuml/linker/src/main.rs b/plantuml/linker/src/main.rs deleted file mode 100644 index 87ac1c88..00000000 --- a/plantuml/linker/src/main.rs +++ /dev/null @@ -1,266 +0,0 @@ -// ******************************************************************************* -// Copyright (c) 2026 Contributors to the Eclipse Foundation -// -// See the NOTICE file(s) distributed with this work for additional -// information regarding copyright ownership. -// -// This program and the accompanying materials are made available under the -// terms of the Apache License Version 2.0 which is available at -// -// -// SPDX-License-Identifier: Apache-2.0 -// ******************************************************************************* - -//! PlantUML Linker -//! -//! Reads FlatBuffers `.fbs.bin` files produced by the PlantUML parser and -//! generates `plantuml_links.json` for the `clickable_plantuml` Sphinx extension. -//! -//! The tool correlates components across multiple diagrams: when a component -//! alias in diagram A matches a top-level component alias in diagram B, a -//! clickable link is created from A → B. - -use std::collections::HashMap; -use std::fs; - -use clap::{Parser, ValueEnum}; -use env_logger::Builder; - -use component_fbs::component as fb_component; - -// --------------------------------------------------------------------------- -// Log level -// --------------------------------------------------------------------------- - -/// CLI-visible log level (mirrors the parser's convention). -#[derive(Copy, Clone, ValueEnum, Debug)] -enum CliLogLevel { - Error, - Warn, - Info, - Debug, - Trace, -} - -impl CliLogLevel { - fn to_level_filter(self) -> log::LevelFilter { - match self { - CliLogLevel::Error => log::LevelFilter::Error, - CliLogLevel::Warn => log::LevelFilter::Warn, - CliLogLevel::Info => log::LevelFilter::Info, - CliLogLevel::Debug => log::LevelFilter::Debug, - CliLogLevel::Trace => log::LevelFilter::Trace, - } - } -} - -// --------------------------------------------------------------------------- -// CLI -// --------------------------------------------------------------------------- - -#[derive(Parser, Debug)] -#[command(name = "linker")] -#[command(version = "1.0")] -#[command( - about = "Generate plantuml_links.json from FlatBuffers diagram outputs", - long_about = "Reads .fbs.bin files from the PlantUML parser and produces a \ - plantuml_links.json file mapping component aliases to their \ - detailed diagrams for the clickable_plantuml Sphinx extension." -)] -struct Args { - /// FlatBuffers binary files to process (.fbs.bin) - #[arg(long, num_args = 1..)] - fbs_files: Vec, - - /// Output JSON file path - #[arg(long, default_value = "plantuml_links.json")] - output: String, - - /// Log level: error, warn, info, debug, trace - #[arg(long, value_enum, default_value = "warn")] - log_level: CliLogLevel, -} - -// --------------------------------------------------------------------------- -// Data model -// --------------------------------------------------------------------------- - -/// A component extracted from a FlatBuffers diagram. -#[derive(Debug)] -struct DiagramComponent { - alias: String, - parent_id: Option, -} - -/// All components from a single diagram file. -#[derive(Debug)] -struct DiagramInfo { - source_file: String, - components: Vec, -} - -/// One entry in the output JSON `links` array. -#[derive(Debug, serde::Serialize)] -struct LinkEntry { - source_file: String, - source_id: String, - target_file: String, -} - -/// Root structure of the output JSON. -#[derive(Debug, serde::Serialize)] -struct LinksJson { - links: Vec, -} - -// --------------------------------------------------------------------------- -// FlatBuffers reading -// --------------------------------------------------------------------------- - -fn read_diagram(path: &str) -> Result { - let data = fs::read(path).map_err(|e| format!("Failed to read {path}: {e}"))?; - - if data.is_empty() { - return Err(format!("Empty file (placeholder): {path}")); - } - - let graph = flatbuffers::root::(&data) - .map_err(|e| format!("Failed to parse FlatBuffer {path}: {e}"))?; - - let source_file = graph - .source_file() - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .ok_or_else(|| format!("Missing source_file in FlatBuffer: {path}"))?; - - let mut components = Vec::new(); - if let Some(entries) = graph.components() { - for entry in entries.iter() { - let Some(comp) = entry.value() else { - continue; - }; - let alias = comp.alias().or(comp.name()).unwrap_or_default().to_string(); - if alias.is_empty() { - continue; - } - components.push(DiagramComponent { - alias, - parent_id: comp.parent_id().map(|s| s.to_string()), - }); - } - } - - Ok(DiagramInfo { - source_file, - components, - }) -} - -// --------------------------------------------------------------------------- -// Link generation -// --------------------------------------------------------------------------- - -/// Build links by matching component aliases across diagrams. -/// -/// For each component alias in diagram A, if a top-level component (no parent) -/// with the same alias exists in diagram B, we create a link: -/// source_file = A, source_id = alias, target_file = B -/// -/// A component is considered "top-level" if its `parent_id` is `None`. -fn generate_links(diagrams: &[DiagramInfo]) -> Vec { - // Index: alias → list of diagrams where that alias is a top-level component - let mut top_level_index: HashMap> = HashMap::new(); - for diagram in diagrams { - for comp in &diagram.components { - if comp.parent_id.is_none() { - top_level_index - .entry(comp.alias.clone()) - .or_default() - .push(&diagram.source_file); - } - } - } - - let mut links = Vec::new(); - - for diagram in diagrams { - for comp in &diagram.components { - if let Some(target_diagrams) = top_level_index.get(&comp.alias) { - for &target_file in target_diagrams { - // Don't link a component to its own diagram. - if target_file == diagram.source_file { - continue; - } - links.push(LinkEntry { - source_file: diagram.source_file.clone(), - source_id: comp.alias.clone(), - target_file: target_file.to_string(), - }); - } - } - } - } - - // Deduplicate: same (source_file, source_id, target_file) may appear - // when a component is nested inside multiple parent scopes. - links.sort_by(|a, b| { - (&a.source_file, &a.source_id, &a.target_file).cmp(&( - &b.source_file, - &b.source_id, - &b.target_file, - )) - }); - links.dedup_by(|a, b| { - a.source_file == b.source_file - && a.source_id == b.source_id - && a.target_file == b.target_file - }); - - // PlantUML supports only one URL per alias — keep the first target - // (alphabetically) for each (source_file, source_id) pair. - links.dedup_by(|a, b| a.source_file == b.source_file && a.source_id == b.source_id); - - links -} - -// --------------------------------------------------------------------------- -// Main -// --------------------------------------------------------------------------- - -fn main() -> Result<(), Box> { - let args = Args::parse(); - Builder::new() - .filter_level(args.log_level.to_level_filter()) - .init(); - - if args.fbs_files.is_empty() { - return Err("No .fbs.bin files provided. Use --fbs-files ...".into()); - } - - let mut diagrams = Vec::new(); - for fbs_path in &args.fbs_files { - match read_diagram(fbs_path) { - Ok(diagram) => { - log::info!( - "Read {} components from {}", - diagram.components.len(), - diagram.source_file - ); - diagrams.push(diagram); - } - Err(e) => { - log::warn!("Skipping {}: {}", fbs_path, e); - } - } - } - - let links = generate_links(&diagrams); - log::info!("Generated {} link(s)", links.len()); - - let output = LinksJson { links }; - let json = serde_json::to_string_pretty(&output)?; - fs::write(&args.output, &json)?; - log::debug!("Written to {}", args.output); - - Ok(()) -} diff --git a/plantuml/parser/BUILD b/plantuml/parser/BUILD index 197ce9f6..bc9b5c3e 100644 --- a/plantuml/parser/BUILD +++ b/plantuml/parser/BUILD @@ -23,7 +23,7 @@ alias( ) alias( - name = "linker", - actual = "//plantuml/linker:linker", + name = "puml_cli", + actual = "//plantuml/parser/puml_cli:puml_cli", visibility = ["//visibility:public"], ) diff --git a/plantuml/parser/puml_cli/BUILD b/plantuml/parser/puml_cli/BUILD index ea703163..f8cef2da 100644 --- a/plantuml/parser/puml_cli/BUILD +++ b/plantuml/parser/puml_cli/BUILD @@ -19,6 +19,7 @@ rust_binary( visibility = ["//visibility:public"], deps = [ "//plantuml/parser/puml_fta", + "//plantuml/parser/puml_idmap", "//plantuml/parser/puml_lobster", "//plantuml/parser/puml_parser", "//plantuml/parser/puml_resolver", @@ -45,6 +46,7 @@ rust_test( crate_root = "src/main.rs", deps = [ "//plantuml/parser/puml_fta", + "//plantuml/parser/puml_idmap", "//plantuml/parser/puml_lobster", "//plantuml/parser/puml_parser", "//plantuml/parser/puml_resolver", diff --git a/plantuml/parser/puml_cli/src/main.rs b/plantuml/parser/puml_cli/src/main.rs index 774b76f8..817849d4 100644 --- a/plantuml/parser/puml_cli/src/main.rs +++ b/plantuml/parser/puml_cli/src/main.rs @@ -27,6 +27,7 @@ use component_serializer::ComponentSerializer; use sequence_serializer::SequenceSerializer; use puml_fta::{lobster_document, FtaChain, FtaModel}; +use puml_idmap::{write_idmap_to_file, IdMapModel}; use puml_lobster::{write_lobster_to_file, LobsterModel}; use puml_parser::{ DiagramParser, ErrorLocation, Preprocessor, ProcedureParserService, PumlActivityParser, @@ -109,6 +110,19 @@ struct Args { /// processing is performed in this mode. #[arg(long)] fta_output_dir: Option, + + /// Output directory for generated idmap sidecar files (optional). + /// When set, a .idmap.json is written for each resolved diagram, + /// recording the defines/references used by the clickable_plantuml + /// Sphinx extension to resolve cross-diagram links. + #[arg(long)] + idmap_output_dir: Option, + + /// Stable workspace-relative source name baked into generated artifacts + /// (FlatBuffers/lobster/idmap ``source`` field). When omitted, the + /// filesystem basename is used as a fallback. + #[arg(long)] + source_name: Option, } #[derive(Copy, Clone, ValueEnum, Debug)] @@ -167,13 +181,20 @@ fn run() -> Result<(), Box> { None }; - let lobster_output_dir: Option = match &args.lobster_output_dir { - Some(dir) => { - let p = PathBuf::from(dir); - fs::create_dir_all(&p)?; - Some(p) - } - None => None, + let lobster_output_dir: Option = if let Some(dir) = &args.lobster_output_dir { + let p = PathBuf::from(dir); + fs::create_dir_all(&p)?; + Some(p) + } else { + None + }; + + let idmap_output_dir: Option = if let Some(dir) = &args.idmap_output_dir { + let p = PathBuf::from(dir); + fs::create_dir_all(&p)?; + Some(p) + } else { + None }; let file_list = collect_files_from_args(&args)?; @@ -209,9 +230,12 @@ fn run() -> Result<(), Box> { } } - let source_file = path - .file_name() - .and_then(|n| n.to_str()) + // Prefer the stable workspace-relative --source-name when + // provided; fall back to the filesystem basename (legacy). + let source_file: &str = args + .source_name + .as_deref() + .or_else(|| path.file_name().and_then(|n| n.to_str())) .unwrap_or_default(); let fbs_buffer = serialize_resolved_diagram(&logic_result, source_file); if let Some(ref dir) = fbs_output_dir { @@ -225,7 +249,17 @@ fn run() -> Result<(), Box> { ResolvedDiagram::Activity(_) => LobsterModel::Empty, ResolvedDiagram::Sequence(_) => LobsterModel::Empty, }; - write_lobster_to_file(lobster_model, path, ldir)?; + write_lobster_to_file(lobster_model, path, Some(source_file), ldir)?; + } + + if let Some(idir) = &idmap_output_dir { + let idmap_model = match &logic_result { + ResolvedDiagram::Component(model) => IdMapModel::Component(model), + ResolvedDiagram::Class(model) => IdMapModel::Class(model), + ResolvedDiagram::Activity(_) => IdMapModel::Empty, + ResolvedDiagram::Sequence(model) => IdMapModel::Sequence(model), + }; + write_idmap_to_file(idmap_model, path, Some(source_file), idir)?; } } Err(e) => { diff --git a/plantuml/linker/BUILD b/plantuml/parser/puml_idmap/BUILD similarity index 53% rename from plantuml/linker/BUILD rename to plantuml/parser/puml_idmap/BUILD index 38337849..7cf6f15f 100644 --- a/plantuml/linker/BUILD +++ b/plantuml/parser/puml_idmap/BUILD @@ -10,18 +10,17 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_test") -rust_binary( - name = "linker", - srcs = ["src/main.rs"], - crate_root = "src/main.rs", - visibility = ["//visibility:public"], +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "puml_idmap", + srcs = ["src/lib.rs"], + visibility = ["//plantuml/parser:__subpackages__"], deps = [ - "//tools/serialization/flatbuffers/component:component_fbs", - "@crates//:clap", - "@crates//:env_logger", - "@crates//:flatbuffers", + "//tools/metamodel/class:class_diagram", + "//tools/metamodel/component:component_diagram", + "//tools/metamodel/sequence:sequence_diagram", "@crates//:log", "@crates//:serde", "@crates//:serde_json", @@ -29,6 +28,11 @@ rust_binary( ) rust_test( - name = "linker_test", - crate = ":linker", + name = "puml_idmap_test", + crate = ":puml_idmap", + deps = [ + "//tools/metamodel/class:class_diagram", + "//tools/metamodel/component:component_diagram", + "//tools/metamodel/sequence:sequence_diagram", + ], ) diff --git a/plantuml/parser/puml_idmap/src/lib.rs b/plantuml/parser/puml_idmap/src/lib.rs new file mode 100644 index 00000000..0ed058b7 --- /dev/null +++ b/plantuml/parser/puml_idmap/src/lib.rs @@ -0,0 +1,497 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +//! Converts the resolved PlantUML logical model into an `.idmap.json` file +//! consumed by the `clickable_plantuml` Sphinx extension. +//! +//! The idmap separates each diagram's elements into two roles: +//! +//! * **defines** – elements that are *elaborated* in this diagram (they have +//! child elements, class members, or this diagram is the detail view). +//! * **references** – leaf mentions and relation endpoints (elements that +//! should link *away* to wherever they are elaborated). +//! +//! This mirrors the structure of `puml_lobster` but produces idmap JSON +//! rather than LOBSTER trace JSON. + +use class_diagram::ClassDiagram; +use component_diagram::LogicComponent; +use sequence_logic::SequenceTree; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::ffi::OsStr; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +// --------------------------------------------------------------------------- +// Data model +// --------------------------------------------------------------------------- + +/// A single element entry in the idmap. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct IdMapEntry { + /// PlantUML alias used in `url of is [[url]]` injection. + pub alias: String, + /// Fully-qualified identifier (FQN) for matching across diagrams. + pub id: String, + /// `true` when this diagram elaborates the element (i.e. it is listed + /// under `defines`). Omitted from the JSON for plain references. + #[serde(default, skip_serializing_if = "is_not_elaborated")] + pub elaborated: bool, +} + +/// `skip_serializing_if` predicate: omit `elaborated` when it is `false`. +fn is_not_elaborated(elaborated: &bool) -> bool { + !*elaborated +} + +/// Root structure of an `.idmap.json` file. +#[derive(Debug, Serialize, Deserialize)] +pub struct IdMapFile { + /// Workspace-relative source path, e.g. `score/mw/com/proxy_detail.puml`. + pub source: String, + /// Elements elaborated (defined) in this diagram. + pub defines: Vec, + /// Elements referenced (leaf/relation endpoint) in this diagram. + pub references: Vec, +} + +// --------------------------------------------------------------------------- +// Model wrapper +// --------------------------------------------------------------------------- + +/// Union of the resolved diagram models accepted by the idmap writer. +pub enum IdMapModel<'a> { + Component(&'a HashMap), + Class(&'a ClassDiagram), + Sequence(&'a SequenceTree), + /// Diagrams with no cross-linkable elements (e.g. activity); produces an + /// empty idmap so the parser's always-declared sidecar output is written. + Empty, +} + +// --------------------------------------------------------------------------- +// Model converters +// --------------------------------------------------------------------------- + +/// Produce an [`IdMapFile`] from a resolved component diagram. +/// +/// An element is a **define** when at least one other element lists it as its +/// `parent_id` (i.e. it has children and is therefore elaborated here). +/// All remaining elements are **references** (top-level leaves that mention +/// something that may be detailed in another diagram). +fn comp_model_to_idmap(model: &HashMap, source: &str) -> IdMapFile { + // Collect the set of IDs that are listed as parent by at least one child. + let has_children: HashSet<&str> = model + .values() + .filter_map(|c| c.parent_id.as_deref()) + .collect(); + + let mut defines = Vec::new(); + let mut references = Vec::new(); + + for comp in model.values() { + let alias = comp + .alias + .as_deref() + .or(comp.name.as_deref()) + .unwrap_or(&comp.id) + .to_string(); + let is_define = has_children.contains(comp.id.as_str()); + let entry = IdMapEntry { + alias, + id: comp.id.clone(), + elaborated: is_define, + }; + if is_define { + defines.push(entry); + } else { + references.push(entry); + } + } + + // Deterministic output order. + defines.sort_by(|a, b| a.id.cmp(&b.id)); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines, + references, + } +} + +/// Produce an [`IdMapFile`] from a resolved class diagram. +/// +/// A class entity is a **define** when it has any members (methods or +/// variables), making this diagram the elaboration site. Entities without +/// members are **references**. +fn class_model_to_idmap(model: &ClassDiagram, source: &str) -> IdMapFile { + let mut defines = Vec::new(); + let mut references = Vec::new(); + + for entity in &model.entities { + let has_members = !entity.methods.is_empty() || !entity.variables.is_empty(); + let entry = IdMapEntry { + alias: entity.name.clone(), + id: entity.id.clone(), + elaborated: has_members, + }; + if has_members { + defines.push(entry); + } else { + references.push(entry); + } + } + + defines.sort_by(|a, b| a.id.cmp(&b.id)); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines, + references, + } +} + +/// Collect the unique participant names from a sequence tree. +fn collect_participants(tree: &SequenceTree) -> HashSet { + use sequence_logic::{Event, SequenceNode}; + + fn walk_nodes(nodes: &[SequenceNode], out: &mut HashSet) { + for node in nodes { + match &node.event { + Event::Interaction(i) => { + out.insert(i.caller.clone()); + out.insert(i.callee.clone()); + } + Event::Return(r) => { + out.insert(r.caller.clone()); + out.insert(r.callee.clone()); + } + Event::Condition(_) => {} + } + walk_nodes(&node.branches_node, out); + } + } + + let mut participants = HashSet::new(); + walk_nodes(&tree.root_interactions, &mut participants); + participants +} + +/// Produce an [`IdMapFile`] from a resolved sequence diagram. +/// +/// Sequence diagrams have no "definition" elements — all participants are +/// references (each participant links away to the component diagram that +/// elaborates it). +fn sequence_model_to_idmap(model: &SequenceTree, source: &str) -> IdMapFile { + let participants = collect_participants(model); + let mut references: Vec = participants + .into_iter() + .map(|name| IdMapEntry { + alias: name.clone(), + id: name, + elaborated: false, + }) + .collect(); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines: Vec::new(), + references, + } +} + +/// Produce an empty [`IdMapFile`] for diagrams without cross-linkable elements. +fn empty_idmap(source: &str) -> IdMapFile { + IdMapFile { + source: source.to_string(), + defines: Vec::new(), + references: Vec::new(), + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Write an `.idmap.json` file for *model* into *output_dir*. +/// +/// The output filename is `.idmap.json` where `` is the file +/// stem of *input_path* (the original `.puml` source file). +/// +/// The `source` field embedded in the JSON is set to *source_name* when +/// provided (preferred: a stable workspace-relative path such as +/// `score/mw/com/proxy_detail.puml`), otherwise falls back to +/// `input_path.to_string_lossy()`. +pub fn write_idmap_to_file( + model: IdMapModel<'_>, + input_path: &Path, + source_name: Option<&str>, + output_dir: &Path, +) -> io::Result { + let source = source_name + .map(|s| s.to_string()) + .unwrap_or_else(|| input_path.to_string_lossy().into_owned()); + + let idmap = match model { + IdMapModel::Component(m) => comp_model_to_idmap(m, &source), + IdMapModel::Class(m) => class_model_to_idmap(m, &source), + IdMapModel::Sequence(m) => sequence_model_to_idmap(m, &source), + IdMapModel::Empty => empty_idmap(&source), + }; + + let file_stem = input_path + .file_stem() + .and_then(OsStr::to_str) + .unwrap_or("output"); + let output_path = output_dir.join(format!("{file_stem}.idmap.json")); + + let json = serde_json::to_string_pretty(&idmap) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + fs::write(&output_path, json)?; + + log::debug!("idmap written to {}", output_path.display()); + Ok(output_path) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use class_diagram::{MemberVariable, SimpleEntity}; + use component_diagram::ComponentType; + use sequence_logic::{Event, Interaction, SequenceNode}; + + fn component( + id: &str, + alias: Option<&str>, + name: Option<&str>, + parent: Option<&str>, + ) -> LogicComponent { + LogicComponent { + id: id.to_string(), + name: name.map(str::to_string), + alias: alias.map(str::to_string), + parent_id: parent.map(str::to_string), + element_type: ComponentType::Component, + stereotype: None, + relations: Vec::new(), + } + } + + fn component_map(components: Vec) -> HashMap { + components.into_iter().map(|c| (c.id.clone(), c)).collect() + } + + #[test] + fn component_children_make_define_leaves_make_reference() { + // `Proxy` has a child `Handler` → Proxy is a define, Handler a reference. + let model = component_map(vec![ + component("Proxy", Some("Proxy"), None, None), + component("Handler", Some("Handler"), None, Some("Proxy")), + ]); + + let idmap = comp_model_to_idmap(&model, "pkg/proxy.puml"); + + assert_eq!(idmap.source, "pkg/proxy.puml"); + assert_eq!( + idmap + .defines + .iter() + .map(|e| e.id.as_str()) + .collect::>(), + ["Proxy"] + ); + assert_eq!( + idmap + .references + .iter() + .map(|e| e.id.as_str()) + .collect::>(), + ["Handler"] + ); + assert!(idmap.defines[0].elaborated); + assert!(!idmap.references[0].elaborated); + } + + #[test] + fn component_with_no_children_is_all_references() { + let model = component_map(vec![ + component("A", Some("A"), None, None), + component("B", Some("B"), None, None), + ]); + + let idmap = comp_model_to_idmap(&model, "pkg/overview.puml"); + + assert!(idmap.defines.is_empty()); + assert_eq!(idmap.references.len(), 2); + } + + #[test] + fn component_alias_falls_back_to_name_then_id() { + let model = component_map(vec![ + component("id.only", None, None, None), + component("id.named", None, Some("DisplayName"), None), + component("id.aliased", Some("AliasName"), Some("DisplayName"), None), + ]); + + let idmap = comp_model_to_idmap(&model, "pkg/aliases.puml"); + + let alias_of = |id: &str| -> String { + idmap + .references + .iter() + .find(|e| e.id == id) + .map(|e| e.alias.clone()) + .unwrap() + }; + assert_eq!(alias_of("id.only"), "id.only"); + assert_eq!(alias_of("id.named"), "DisplayName"); + assert_eq!(alias_of("id.aliased"), "AliasName"); + } + + #[test] + fn component_output_is_sorted_by_id() { + let model = component_map(vec![ + component("zeta", Some("zeta"), None, None), + component("alpha", Some("alpha"), None, None), + component("mu", Some("mu"), None, None), + ]); + + let idmap = comp_model_to_idmap(&model, "pkg/sorted.puml"); + + let ids: Vec<&str> = idmap.references.iter().map(|e| e.id.as_str()).collect(); + assert_eq!(ids, ["alpha", "mu", "zeta"]); + } + + #[test] + fn class_entities_with_members_are_defines() { + let with_members = SimpleEntity { + id: "pkg.WithMembers".to_string(), + name: "WithMembers".to_string(), + variables: vec![MemberVariable::default()], + ..Default::default() + }; + let without_members = SimpleEntity { + id: "pkg.Empty".to_string(), + name: "Empty".to_string(), + ..Default::default() + }; + let model = ClassDiagram { + name: "d".to_string(), + entities: vec![with_members, without_members], + relationships: Vec::new(), + source_files: Vec::new(), + version: None, + }; + + let idmap = class_model_to_idmap(&model, "pkg/classes.puml"); + + assert_eq!( + idmap + .defines + .iter() + .map(|e| e.id.as_str()) + .collect::>(), + ["pkg.WithMembers"] + ); + assert_eq!( + idmap + .references + .iter() + .map(|e| e.id.as_str()) + .collect::>(), + ["pkg.Empty"] + ); + } + + #[test] + fn class_output_is_sorted_by_id_for_defines_and_references() { + let with_members_z = SimpleEntity { + id: "pkg.Z".to_string(), + name: "Z".to_string(), + variables: vec![MemberVariable::default()], + ..Default::default() + }; + let with_members_a = SimpleEntity { + id: "pkg.A".to_string(), + name: "A".to_string(), + variables: vec![MemberVariable::default()], + ..Default::default() + }; + let ref_m = SimpleEntity { + id: "pkg.M".to_string(), + name: "M".to_string(), + ..Default::default() + }; + let ref_b = SimpleEntity { + id: "pkg.B".to_string(), + name: "B".to_string(), + ..Default::default() + }; + + let model = ClassDiagram { + name: "sorted".to_string(), + entities: vec![with_members_z, ref_m, with_members_a, ref_b], + relationships: Vec::new(), + source_files: Vec::new(), + version: None, + }; + + let idmap = class_model_to_idmap(&model, "pkg/class_sorted.puml"); + + let define_ids: Vec<&str> = idmap.defines.iter().map(|e| e.id.as_str()).collect(); + let ref_ids: Vec<&str> = idmap.references.iter().map(|e| e.id.as_str()).collect(); + + assert_eq!(define_ids, ["pkg.A", "pkg.Z"]); + assert_eq!(ref_ids, ["pkg.B", "pkg.M"]); + } + + #[test] + fn sequence_participants_become_sorted_references() { + let interaction = |caller: &str, callee: &str| SequenceNode { + event: Event::Interaction(Interaction { + caller: caller.to_string(), + callee: callee.to_string(), + method: "call".to_string(), + }), + branches_node: Vec::new(), + }; + let tree = SequenceTree { + name: None, + root_interactions: vec![interaction("Zebra", "Alpha"), interaction("Alpha", "Mango")], + }; + + let idmap = sequence_model_to_idmap(&tree, "pkg/seq.puml"); + + assert!(idmap.defines.is_empty()); + let ids: Vec<&str> = idmap.references.iter().map(|e| e.id.as_str()).collect(); + assert_eq!(ids, ["Alpha", "Mango", "Zebra"]); + } + + #[test] + fn empty_model_yields_empty_idmap() { + let idmap = empty_idmap("pkg/activity.puml"); + + assert_eq!(idmap.source, "pkg/activity.puml"); + assert!(idmap.defines.is_empty()); + assert!(idmap.references.is_empty()); + } +} diff --git a/plantuml/parser/puml_lobster/src/lib.rs b/plantuml/parser/puml_lobster/src/lib.rs index e1136757..87b2c04b 100644 --- a/plantuml/parser/puml_lobster/src/lib.rs +++ b/plantuml/parser/puml_lobster/src/lib.rs @@ -131,20 +131,21 @@ fn map_entity_type_to_kind(entity_type: EntityType) -> &'static str { /// /// The output filename is `.lobster` where `` is the file stem of /// `input_path` (the original `.puml` source file). +/// +/// When `source_name` is provided, it is embedded as the stable source path in +/// the emitted JSON. Otherwise the filesystem path of `input_path` is used. pub fn write_lobster_to_file( model: LobsterModel<'_>, input_path: &Path, + source_name: Option<&str>, output_dir: &Path, ) -> io::Result { + let source = source_name + .map(str::to_owned) + .unwrap_or_else(|| input_path.to_string_lossy().into_owned()); let lobster = match model { - LobsterModel::Component(component_model) => { - let source_str = input_path.to_string_lossy().into_owned(); - comp_model_to_lobster(component_model, &source_str) - } - LobsterModel::Class(class_model) => { - let source_str = input_path.to_string_lossy().into_owned(); - class_model_to_lobster(class_model, &source_str) - } + LobsterModel::Component(component_model) => comp_model_to_lobster(component_model, &source), + LobsterModel::Class(class_model) => class_model_to_lobster(class_model, &source), LobsterModel::Empty => empty_lobster_document(), }; diff --git a/plantuml/sphinx/clickable_plantuml/README.md b/plantuml/sphinx/clickable_plantuml/README.md index 9d767037..315c41de 100644 --- a/plantuml/sphinx/clickable_plantuml/README.md +++ b/plantuml/sphinx/clickable_plantuml/README.md @@ -16,19 +16,22 @@ Sphinx extension that makes PlantUML diagrams clickable by injecting hyperlinks ## Sphinx Integration -The extension hooks into the native Sphinx build lifecycle. URLs are computed by -`app.builder.get_relative_uri()`, which works for any builder and -output directory layout. +The extension hooks into the native Sphinx build lifecycle. URL computation +depends on the configured `plantuml_output_format`: in `svg_obj` mode the +rendered SVG lives in `_images/`, so links are made relative to that directory +(`os.path.relpath(target_uri, imagedir)`); for inline `svg`/`png` the link is +relative to the containing HTML page via +`app.builder.get_relative_uri(from_docname, to_docname)`. ``` Sphinx build lifecycle clickable_plantuml hooks ═══════════════════════════════════ ═══════════════════════════════════════ builder-inited ───► on_builder_inited() - │ (one-time setup) Load all *plantuml_links.json files - │ from srcdir (recursive). - │ Store {puml_basename → alias_map} - │ in app.env. + │ (one-time setup) Load all *.idmap.json files from + │ srcdir (recursive). + │ Build definition index: + │ {alias|id → [definer source paths]}. │ ├─ READ PHASE ────────────────────────────────────────────────────────────── │ for each document: @@ -40,10 +43,9 @@ Sphinx build lifecycle clickable_plantuml hooks │ │ │ doctree-read ───► on_doctree_read() │ (per document) Traverse the parsed doctree. - │ For every plantuml node that has a - │ filename attribute, record - │ {puml_basename → docname} in app.env. - │ Warn on basename collisions. + │ For every plantuml node, record + │ {normalized_source_path → docname} + │ in app.env (path identity, not basename). │ │ env-merge-info ───► on_env_merge_info() │ (parallel builds only) Merge puml→docname maps gathered @@ -55,119 +57,181 @@ Sphinx build lifecycle clickable_plantuml hooks │ post-transform / resolve │ │ │ doctree-resolved ───► on_doctree_resolved() - │ (per document) For each plantuml node, look up the - │ alias_map from app.env. - │ Resolve target .puml → docname, then - │ call app.builder.get_relative_uri() - │ to get the correct relative URL. - │ Append url of is [[url]] - │ directives to node['uml'] before - │ sphinxcontrib-plantuml renders it. + │ (per document) For each plantuml node, load its idmap. + │ For each reference entry, look up the + │ definition index (FQN first, then alias). + │ Apply proximity tiebreak on ambiguity. + │ Build the URL (relative to _images/ in + │ svg_obj mode, else page-relative via + │ get_relative_uri), then append + │ url of is [[url]] directives to + │ node['uml'] before rendering. │ build-finished ``` ## How It Works -1. **Link discovery** (`builder-inited`) – Scans for `*plantuml_links.json` files in the Sphinx source directory. -2. **Diagram location mapping** (`doctree-read`) – As Sphinx reads each document, the extension traverses the parsed doctree to record which `docname` contains which `.puml` diagram (keyed by basename). Basename collisions across documents are reported as warnings. -3. **URL resolution & link injection** (`doctree-resolved`) – For each plantuml node, resolves target `.puml` references to the docname that contains the target diagram, generates a relative URL via `app.builder.get_relative_uri()`, and appends `url of is [[url]]` directives to the PlantUML source before rendering. -4. **Incremental / parallel support** – `env-purge-doc` removes stale entries when a document is re-read; `env-merge-info` merges state from parallel worker processes. +1. **idmap discovery** (`builder-inited`) – Scans for `*.idmap.json` files in + the Sphinx source directory. Each sidecar records *defines* (elements + elaborated in that diagram, i.e. with children/members) and *references* + (leaf mentions and relation endpoints). A global definition index maps + each alias/FQN to the set of diagrams that elaborate it. + +2. **Diagram location mapping** (`doctree-read`) – Records which `docname` + contains which `.puml` diagram, keyed by the canonical workspace-relative + path. A node's identity is recovered by normalising its absolute path + (`srcdir` + the node's `incdir` + `filename`) and matching it against the + idmap `source` keys by exact *full-path suffix* — so two same-basename + diagrams in different packages never collide, and neither a `srcdir` that + is a workspace sub-directory nor symlinked staging can break the mapping. + +3. **URL resolution & link injection** (`doctree-resolved`) – For each + reference in a diagram's idmap, resolves the unique definer via the index. + When multiple diagrams define the same element, a *proximity tiebreak* + selects the definer sharing the longest common path prefix with the source + diagram. On a genuine tie, no link is emitted (safe over wrong). URLs are + built relative to `_images/` in `svg_obj` mode (else page-relative via + `app.builder.get_relative_uri()`) and percent-encoded before injection. + +4. **Incremental / parallel support** – `env-purge-doc` removes stale entries + when a document is re-read; `env-merge-info` merges state from parallel + worker processes. + +## Invariants + +The extension relies on two invariants held by the idmap producer +(`architectural_design()` / `puml_cli`): + +1. **The idmap `source` must be the diagram's stable, unique + workspace-relative path.** This value is the canonical key used for all + matching: a plantuml node resolves to the idmap whose `source` is an exact + full-path suffix of the node's absolute path. In Bazel this must match the + staged source's workspace-relative path under `srcdir`; the rule passes + `puml_file.short_path` to satisfy that invariant regardless of how Sphinx + roots `srcdir` or how Bazel symlinks the staged sources. Non-unique + `source` values fail the build: two idmaps normalising to the same + canonical key raise an `ExtensionError` rather than silently mislinking. + +2. **PlantUML basenames (file stems) must be unique within a single + `architectural_design` target.** Each `.idmap.json` is written as + `.idmap.json` under the target's output directory, so two + diagrams sharing a stem in one target would collide on output. Same + basenames across *different* targets/packages are fine — exact canonical-key + matching keeps them independent. + +## Automatic idmap Generation (Bazel) + +`.idmap.json` sidecars are produced by the `architectural_design()` rule. + +The rule passes `--source-name ` and +`--idmap-output-dir` to `puml_cli` for every `.puml` file. The +`source` field in the resulting idmap is a stable, workspace-relative path +(e.g. `score/mw/com/proxy_detail.puml`), which is used as the diagram's +identity key throughout the extension. + +### Role detection algorithm + +Given the resolved model of one `.puml` diagram: + +1. **defines** – An element is a *define* when: + - At least one other element lists it as its `parent_id` (component + diagrams); or it has member variables / methods (class diagrams). +2. **references** – All remaining elements: top-level leaf boxes, relation + endpoints (component), and sequence participants. + +### Concrete example -## Automatic JSON Generation (Bazel) +```text +' overview.puml — top-level leaves are REFERENCES +@startuml +[Gateway] --> [Proxy] +@enduml +``` -`plantuml_links.json` is generated by the `architectural_design()` rule. +```text +' proxy_detail.puml — Proxy has a child → DEFINE +@startuml +package Proxy { [RequestHandler] } +@enduml +``` -The `architectural_design()` rule invokes `//tools/plantuml/linker:linker` on all -`.fbs.bin` FlatBuffers files produced by the PlantUML parser. See -[Link Mapping Format](#link-mapping-format) for a detailed -description of which links are emitted. +`proxy_detail.idmap.json`: +```json +{ "source": "score/mw/com/proxy_detail.puml", + "defines": [{ "alias": "Proxy", "id": "Proxy" }], + "references": [{ "alias": "RequestHandler", "id": "Proxy.RequestHandler" }] } +``` -### Algorithm +`overview.idmap.json`: +```json +{ "source": "score/overview.puml", + "defines": [], + "references": [{ "alias": "Gateway", "id": "Gateway" }, + { "alias": "Proxy", "id": "Proxy" }] } +``` -Given the set of `.fbs.bin` files for one `architectural_design()` target: +Result: `Proxy` in `overview.puml` links to `proxy_detail.puml`. +`Gateway` has no definer → no link. -1. **Build a top-level index** – For each diagram, collect every component whose - `parent_id` is `None` (i.e. it is not nested inside another component). - The index maps `alias → diagram file`. +## idmap Format -2. **Emit links** – For every component in every diagram, look up its alias in - the top-level index. If a *different* diagram defines that alias as a - top-level component, emit a link entry: +`.idmap.json` files are written by the parser and read by this extension. +They are not intended to be authored manually. - ``` - source_file = diagram that contains the reference - source_id = alias of the component - target_file = diagram that defines it as a top-level component - ``` +```json +{ + "source": "path/to/diagram.puml", + "defines": [ + { "alias": "ComponentName", "id": "fully.qualified.Name" } + ], + "references": [ + { "alias": "OtherComponent", "id": "OtherComponent" } + ] +} +``` -3. **Deduplicate** – Sort and deduplicate so that each `(source_file, source_id)` - pair has exactly one target (first alphabetically). Duplicate `source_id` - entries within the same source diagram are removed because PlantUML's - `url of X is [[…]]` directive supports only one URL per alias. +## End-to-End Clickable Diagram Example -### Concrete Example +This minimal example shows what users should create in docs to get a clickable +diagram: +`docs/arch/overview.puml` ```text -' adas_overview.puml — subsystem context @startuml -component ADAS -component BrakeController -component LaneKeepAssist -ADAS --> BrakeController -ADAS --> LaneKeepAssist +[Gateway] --> [Proxy] @enduml ``` +`docs/arch/proxy_detail.puml` ```text -' brake_controller.puml — component detail @startuml -component BrakeController -interface BrakeDemandIF -interface WheelSpeedIF -BrakeController --> BrakeDemandIF -BrakeController <-- WheelSpeedIF +package Proxy { + [RequestHandler] +} @enduml ``` -Generated links — one in each direction: +`docs/arch/overview.rst` +```rst +Overview +======== -```json -{ - "links": [ - { - "source_file": "adas_overview.puml", - "source_id": "BrakeController", - "target_file": "brake_controller.puml" - }, - { - "source_file": "brake_controller.puml", - "source_id": "BrakeController", - "target_file": "adas_overview.puml" - } - ] -} +.. uml:: overview.puml ``` -Clicking `BrakeController` in the overview navigates to its detail diagram; -clicking it in the detail diagram navigates back to the overview. +`docs/arch/proxy_detail.rst` +```rst +Proxy Detail +============ -`ADAS` and `LaneKeepAssist` appear as top-level only in `adas_overview.puml` and -have no dedicated detail diagram, so **no links** are emitted for them. +.. uml:: proxy_detail.puml +``` -(link-mapping-format)= -## Link Mapping Format +When the idmaps contain: -Place one or more `*plantuml_links.json` filesinside the Sphinx source directory: +- `overview.puml` references `Proxy` +- `proxy_detail.puml` defines `Proxy` -```json -{ - "links": [ - { - "source_file": "my_diagram.puml", - "source_id": "ComponentA", - "target_file": "other_diagram.puml" - } - ] -} -``` +the rendered `Proxy` element in `overview.puml` becomes clickable and opens +the page containing `proxy_detail.puml`. diff --git a/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py b/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py index 7317a052..55014661 100644 --- a/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py +++ b/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py @@ -10,27 +10,66 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -"""Sphinx extension to make PlantUML diagrams clickable.""" +"""Sphinx extension to make PlantUML diagrams clickable. + +Design overview +--------------- +Link data is derived from ``.idmap.json`` sidecar files produced by the +PlantUML parser (``puml_cli --idmap-output-dir ...``). Each idmap file +records two roles for the elements in one ``.puml`` diagram: + +* **defines** – elements elaborated (given children / structure) in that + diagram. A component diagram that contains ``package Proxy { ... }`` is + the definition site of ``Proxy``. +* **references** – leaf mentions and relation endpoints. A top-level + ``[Proxy]`` box in an overview is a reference that should link to the + diagram that defines it. + +The matching algorithm: + +1. Build a *definition index*: ``{alias|id → [source_paths]}``. +2. For each reference ``(alias, id)`` in a diagram, look up the index (FQN + ``id`` first, then ``alias``) to find candidate definer diagrams. +3. If exactly one definer: emit the link. +4. If multiple definers: pick the one sharing the longest common workspace- + relative path prefix with the source diagram (proximity tiebreak). + On a tie: log a warning and emit no link (safe over wrong). +5. Never link a diagram to itself. +""" + +from __future__ import annotations import functools import json +import os import re -from pathlib import Path +import urllib.parse +from pathlib import Path, PurePosixPath from typing import Any from docutils import nodes from sphinx.application import Sphinx +from sphinx.errors import ExtensionError from sphinx.util import logging logger = logging.getLogger(__name__) -# Environment attribute names used by this extension. -_ENV_LINK_DATA = "clickable_plantuml_link_data" -# Stores {puml_basename: (docname, anchor_id_or_None)} +# --------------------------------------------------------------------------- +# Environment attribute names +# --------------------------------------------------------------------------- + +# {normalized_source_path: raw_idmap_dict} — loaded once in builder-inited. +_ENV_IDMAP_BY_SOURCE = "clickable_plantuml_idmap_by_source" +# {alias_or_id: [source_path, ...]} — definition index built in builder-inited. +_ENV_DEF_INDEX = "clickable_plantuml_def_index" +# {normalized_source_path: (docname, anchor_or_None)} — populated in doctree-read. _ENV_PUML_DOCNAMES = "clickable_plantuml_puml_docnames" +# Absolute prefix to strip from PlantUML node paths to obtain canonical source keys. +_ENV_WORKSPACE_OFFSET = "clickable_plantuml_workspace_offset" -# Characters allowed in PlantUML alias identifiers. -_ALIAS_SAFE_RE = re.compile(r"^[\w.]+$") +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- def _find_parent_section_id(node: nodes.Node) -> str | None: @@ -45,76 +84,225 @@ def _find_parent_section_id(node: nodes.Node) -> str | None: return None +def _normalize_source_path(raw: str) -> str: + """Normalise a source path to a forward-slash relative string.""" + return str(PurePosixPath(raw)).lstrip("/") + + +def _assert_canonical_source_key(source_key: str) -> None: + """Assert that *source_key* is a workspace-relative POSIX key.""" + if source_key != _normalize_source_path(source_key): + raise ValueError(f"non-canonical source key: {source_key!r}") + + +def _common_prefix_length(path_a: str, path_b: str) -> int: + """Return the number of shared path components between two canonical keys.""" + parts_a = PurePosixPath(path_a).parts + parts_b = PurePosixPath(path_b).parts + count = 0 + for a, b in zip(parts_a, parts_b): + if a == b: + count += 1 + else: + break + return count + + +def _proximity_tiebreak(source: str, candidates: list[str]) -> str | None: + """Pick the candidate with the longest common prefix with *source*. + + All inputs are canonical workspace-relative POSIX keys (guaranteed by the + exact-matching in P0-1); the assertions guard that invariant so a staging + path can never sneak into the comparison. Returns ``None`` when two or + more candidates score equally (tie → no link). + """ + _assert_canonical_source_key(source) + for candidate in candidates: + _assert_canonical_source_key(candidate) + scored = sorted( + candidates, + key=lambda c: _common_prefix_length(source, c), + reverse=True, + ) + best = _common_prefix_length(source, scored[0]) + if len(scored) > 1 and _common_prefix_length(source, scored[1]) == best: + return None + return scored[0] + + +def _resolve_definer( + alias: str, + fqn: str, + source_key: str, + definition_index: dict[str, list[str]], +) -> str | None: + """Return the definer source key for one reference, or ``None``. + + Resolution rules: + + * FQN (``id``) lookup takes precedence over the ``alias`` lookup. + * A diagram never links to itself (self-links are dropped). + * A single remaining candidate wins outright; multiple candidates go + through the proximity tiebreak, and a genuine tie logs a warning and + returns ``None`` (safe over wrong). + """ + _assert_canonical_source_key(source_key) + candidates = definition_index.get(fqn) or definition_index.get(alias) or [] + for candidate in candidates: + _assert_canonical_source_key(candidate) + # Never link a diagram to itself. + candidates = [c for c in candidates if c != source_key] + if not candidates: + return None + if len(candidates) == 1: + return candidates[0] + target = _proximity_tiebreak(source_key, candidates) + if target is None: + logger.warning( + "clickable_plantuml: ambiguous definition for '%s' in '%s'" + " — tied candidates %s; no link emitted", + alias, + source_key, + candidates, + ) + return target + + +def _build_target_url( + builder: Any, + output_format: str, + imagedir: str, + docname: str, + target_docname: str, + anchor: str | None, +) -> str: + """Build the link URL for a resolved definer diagram. + + In ``svg_obj`` mode the rendered SVG lives in the ``_images/`` directory, + so URLs inside the SVG must be relative to ``_images/`` rather than the + containing HTML page. For inline ``svg``/``png`` the SVG is embedded in + the page, so a page-relative URL is correct. The optional section + *anchor* is appended as a fragment. + """ + if output_format == "svg_obj": + target_uri = builder.get_target_uri(target_docname) + url = os.path.relpath(target_uri, imagedir).replace("\\", "/") + else: + url = builder.get_relative_uri(docname, target_docname) + if anchor: + url += f"#{anchor}" + return url + + +def _escape_plantuml_url(url: str) -> str: + """Percent-encode characters significant in PlantUML URL syntax. + + PlantUML terminates ``url of X is [[...]]`` at the first ``]]``. We also + encode ``[``, spaces, and other characters that would confuse the PlantUML + lexer. The fragment (after ``#``) is encoded separately to preserve it. + """ + # Characters that are safe to leave unencoded in a URL context. ``#`` is + # deliberately excluded here and reintroduced only as the single fragment + # separator, so literal hash payload is always encoded. + _SAFE = "/:?&=@!$'()*+,;-._~" + fragment_sep = url.find("#") + if fragment_sep != -1: + base = urllib.parse.quote(url[:fragment_sep], safe=_SAFE) + frag = urllib.parse.quote(url[fragment_sep + 1 :], safe="-._~") + # Keep a real fragment separator so generated SVG href remains valid. + return f"{base}#{frag}" + return urllib.parse.quote(url, safe=_SAFE) + + # --------------------------------------------------------------------------- -# JSON loading +# idmap loading # --------------------------------------------------------------------------- -def _load_link_mappings( - search_dir: str, - pattern: str = "*plantuml_links.json", -) -> dict[str, dict[str, Any]]: - """Return ``{source_file: {source_id: {target_file, ...}}}``.""" - link_data: dict[str, dict[str, Any]] = {} - for json_file in Path(search_dir).rglob(pattern): +def _load_idmap_files( + source_dir: Path, +) -> tuple[dict[str, Any], dict[str, list[str]]]: + """Scan *source_dir* for ``*.idmap.json`` and build the lookup indices. + + The canonical key is the workspace-relative POSIX path stored in each + idmap's ``source`` field (baked in by ``--source-name``). Matching is + exact — there is no basename fallback — so two same-basename diagrams in + different packages never mislink. + + Returns: + idmap_by_source: ``{canonical_source_key → raw idmap dict}`` + definition_index: ``{alias_or_fqn_id → [canonical_source_keys]}`` + + Raises: + ExtensionError: when two idmaps normalise to the same canonical key. + """ + idmap_by_source: dict[str, Any] = {} + definition_index: dict[str, list[str]] = {} + + for json_path in sorted(source_dir.rglob("*.idmap.json")): try: - json_data = json.loads(json_file.read_text(encoding="utf-8")) - if "links" not in json_data or not isinstance(json_data["links"], list): - logger.warning( - "Invalid format in %s: missing 'links' array", - json_file.name, - ) - continue - file_link_count = 0 - for link_entry in json_data["links"]: - source_file = link_entry.get("source_file") - source_id = link_entry.get("source_id") - target_file = link_entry.get("target_file") - if not (source_file and source_id and target_file): - continue - link_data.setdefault(source_file, {})[source_id] = { - "target_file": target_file, - "line": link_entry.get("source_line", 0), - "description": link_entry.get("description", ""), - } - file_link_count += 1 - logger.info( - "Loaded %d links from %s", - file_link_count, - json_file.relative_to(search_dir), - ) + data = json.loads(json_path.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError) as exc: - logger.warning("Failed to load %s: %s", json_file.name, exc) - return link_data + logger.warning("clickable_plantuml: failed to load %s: %s", json_path, exc) + continue + raw_source = data.get("source", "") + if not raw_source: + logger.warning( + "clickable_plantuml: idmap %s missing 'source' field — skipped", + json_path.name, + ) + continue -def _collect_link_data(source_dir: Path) -> dict[str, dict[str, Any]]: - """Load all ``*plantuml_links.json`` files from *source_dir*.""" - if source_dir.exists(): - return _load_link_mappings(str(source_dir)) - return {} + source_key = _normalize_source_path(raw_source) + _assert_canonical_source_key(source_key) + if source_key in idmap_by_source: + raise ExtensionError( + "clickable_plantuml: duplicate idmap source key " + f"'{source_key}' (from {json_path.name}); each diagram's " + "--source-name must be a unique workspace-relative path." + ) + idmap_by_source[source_key] = data + + for entry in data.get("defines", []): + alias = entry.get("alias", "") + fqn = entry.get("id", "") + if alias: + definition_index.setdefault(alias, []).append(source_key) + if fqn and fqn != alias: + definition_index.setdefault(fqn, []).append(source_key) + + logger.info( + "clickable_plantuml: loaded %d idmap file(s), %d unique definition keys", + len(idmap_by_source), + len(definition_index), + ) + return idmap_by_source, definition_index # --------------------------------------------------------------------------- -# UML injection helper +# UML injection # --------------------------------------------------------------------------- +# Characters allowed in a PlantUML alias identifier. +_ALIAS_SAFE_RE = re.compile(r"^[\w.\-]+$") +# Matches the @enduml terminator line (used to inject url directives before it). +_ENDUML_RE = re.compile(r"^\s*@enduml\s*$", re.MULTILINE) + def _inject_links_into_uml(uml_content: str, links: dict[str, str]) -> str: """Append ``url of is [[url]]`` directives before ``@enduml``.""" if not links: return uml_content safe_links = { - alias: url - for alias, url in links.items() - if _ALIAS_SAFE_RE.match(alias) and "]]" not in url + alias: url for alias, url in links.items() if _ALIAS_SAFE_RE.match(alias) } if not safe_links: return uml_content url_directives = "\n".join( f"url of {alias} is [[{url}]]" for alias, url in safe_links.items() ) - enduml_match = re.search(r"^\s*@enduml\s*$", uml_content, re.MULTILINE) + enduml_match = _ENDUML_RE.search(uml_content) if enduml_match: prefix = uml_content[: enduml_match.start()] if not prefix.endswith("\n"): @@ -124,7 +312,7 @@ def _inject_links_into_uml(uml_content: str, links: dict[str, str]) -> str: # --------------------------------------------------------------------------- -# Sphinx event handlers +# plantuml node class (cached import) # --------------------------------------------------------------------------- @@ -139,117 +327,223 @@ def _get_plantuml_node_class() -> type | None: return None +# --------------------------------------------------------------------------- +# Node filename normalisation +# --------------------------------------------------------------------------- + + +def _compute_workspace_offset(srcdir: str, source_keys: set[str]) -> str: + """Compute the absolute prefix used to derive canonical source keys. + + This runs once during ``builder-inited``. The returned prefix is removed + from absolute PlantUML node paths to obtain exact workspace-relative keys. + """ + srcdir_posix = PurePosixPath(os.path.normpath(srcdir)).as_posix() + best_parent = "" + + for key in source_keys: + _assert_canonical_source_key(key) + parent = str(PurePosixPath(key).parent) + if parent in ("", "."): + continue + if srcdir_posix == parent or srcdir_posix.endswith("/" + parent): + if len(parent) > len(best_parent): + best_parent = parent + + if not best_parent: + return srcdir_posix + if srcdir_posix == best_parent: + return srcdir_posix + return srcdir_posix[: -(len(best_parent) + 1)] + + +def _node_source_key( + node: nodes.Node, srcdir: str, workspace_offset: str, source_keys: set[str] +) -> str | None: + """Return the canonical workspace-relative key for a plantuml *node*. + + ``sphinxcontrib.plantuml`` stores the diagram location on the node as + ``incdir`` (directory relative to Sphinx's source root) plus ``filename`` + (bare basename). We first try strict prefix stripping via + ``workspace_offset``. If Bazel staging causes that to fail, we fall back + to exact full-key suffix matching against canonical ``source_keys``. + + This remains collision-safe: we match full canonical keys only, never a + basename-only key. + + Returns ``None`` when the node carries no filename or matches no key. + """ + filename: str = node.get("filename", "") + if not filename: + return None + incdir: str = node.get("incdir", "") + node_abs = PurePosixPath( + os.path.normpath(os.path.join(srcdir, incdir, filename)) + ).as_posix() + + workspace_offset = workspace_offset.rstrip("/") + if node_abs.startswith(workspace_offset + "/"): + source_key = _normalize_source_path(node_abs[len(workspace_offset) + 1 :]) + if source_key in source_keys: + _assert_canonical_source_key(source_key) + return source_key + + # Bazel staging can relocate docs while preserving the tail workspace path. + matches = [ + key for key in source_keys if node_abs == key or node_abs.endswith("/" + key) + ] + if not matches: + return None + source_key = max(matches, key=len) + _assert_canonical_source_key(source_key) + return source_key + + +# --------------------------------------------------------------------------- +# Sphinx event handlers +# --------------------------------------------------------------------------- + + def on_builder_inited(app: Sphinx) -> None: - """Load JSON link data once, before any documents are read.""" + """Load idmap files and build the definition index once.""" if app.builder.format != "html": return source_dir = Path(app.srcdir) - link_data = _collect_link_data(source_dir) - if not link_data: - logger.info("clickable_plantuml: no link mappings found") + if not source_dir.exists(): + logger.info("clickable_plantuml: srcdir does not exist — no idmaps loaded") return - # Normalise keys to basenames for consistent lookup. - normalized = {Path(k).name: v for k, v in link_data.items()} - setattr(app.env, _ENV_LINK_DATA, normalized) + idmap_by_source, definition_index = _load_idmap_files(source_dir) + if not idmap_by_source: + logger.info("clickable_plantuml: no *.idmap.json files found") + return - logger.info( - "clickable_plantuml: loaded links for %d source file(s)", len(normalized) - ) + workspace_offset = _compute_workspace_offset(app.srcdir, set(idmap_by_source)) + setattr(app.env, _ENV_WORKSPACE_OFFSET, workspace_offset) + setattr(app.env, _ENV_IDMAP_BY_SOURCE, idmap_by_source) + setattr(app.env, _ENV_DEF_INDEX, definition_index) def on_doctree_read(app: Sphinx, doctree: nodes.document) -> None: - """Record which docname (and section anchor) contains which ``.puml`` diagram. + """Record which docname (and section anchor) contains which diagram. - Traverses the parsed doctree. - The mapping is stored in ``app.env`` and consumed during ``doctree-resolved``. + Each diagram is registered under its canonical workspace-relative key (the + idmap ``source`` matched to the node's absolute path), which is directly + comparable to the idmap ``source`` field. """ PlantumlNode = _get_plantuml_node_class() if PlantumlNode is None: return + idmap_by_source: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_SOURCE, {}) + source_keys = set(idmap_by_source) + workspace_offset: str = getattr(app.env, _ENV_WORKSPACE_OFFSET, app.srcdir) puml_docnames: dict[str, tuple[str, str | None]] = getattr( app.env, _ENV_PUML_DOCNAMES, {} ) - for node in doctree.traverse(PlantumlNode): - filename = Path(node.get("filename", "")).name - if not filename: + for node in doctree.findall(PlantumlNode): + key = _node_source_key(node, app.srcdir, workspace_offset, source_keys) + if not key: + logger.warning( + "clickable_plantuml: plantuml node in '%s' has no resolvable" + " source path — skipped", + app.env.docname, + ) continue - if filename in puml_docnames: + if key in puml_docnames and puml_docnames[key][0] != app.env.docname: logger.warning( - "clickable_plantuml: diagram '%s' found in both '%s' and '%s' " - "(basename collision — last wins)", - filename, - puml_docnames[filename][0], + "clickable_plantuml: diagram '%s' found in both '%s' and '%s'" + " — last wins (path collision; check idmap source fields)", + key, + puml_docnames[key][0], app.env.docname, ) anchor = _find_parent_section_id(node) - puml_docnames[filename] = (app.env.docname, anchor) + puml_docnames[key] = (app.env.docname, anchor) setattr(app.env, _ENV_PUML_DOCNAMES, puml_docnames) def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> None: - """Inject ``url of is [[url]]`` into plantuml nodes before rendering. - - For each diagram, resolves target ``.puml`` references to the docname that - contains the target diagram and uses ``app.builder.get_relative_uri`` to - produce correct relative URLs. + """Inject ``url of is [[url]]`` into plantuml nodes. + + Resolves each reference in the diagram's idmap to its definer diagram, + applies a proximity tiebreak on ambiguity, and builds a URL whose base + depends on the configured ``plantuml_output_format``: + + * ``svg_obj`` – the rendered SVG lives in the ``_images/`` directory and is + embedded via ````; ```` targets inside the SVG resolve + relative to ``_images/``, so the URL is + ``os.path.relpath(target_uri, imagedir)``. + * inline ``svg`` / ``png`` – the link resolves relative to the containing + HTML page, so the URL is + ``app.builder.get_relative_uri(docname, target_docname)``. """ - link_data: dict[str, dict[str, Any]] = getattr(app.env, _ENV_LINK_DATA, {}) - if app.builder.format != "html" or not link_data: + idmap_by_source: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_SOURCE, {}) + definition_index: dict[str, list[str]] = getattr(app.env, _ENV_DEF_INDEX, {}) + if app.builder.format != "html" or not idmap_by_source: return PlantumlNode = _get_plantuml_node_class() if PlantumlNode is None: return + source_keys = set(idmap_by_source) + workspace_offset: str = getattr(app.env, _ENV_WORKSPACE_OFFSET, app.srcdir) puml_docnames: dict[str, tuple[str, str | None]] = getattr( app.env, _ENV_PUML_DOCNAMES, {} ) - absolute_url_prefixes = ("http://", "https://", "/") + + # Loop-invariant for the whole build: resolve once instead of per reference. + output_format = getattr(app.config, "plantuml_output_format", "png") + imagedir = getattr(app.builder, "imagedir", "_images") modified_count = 0 - for node in doctree.traverse(PlantumlNode): - diagram_filename = Path(node.get("filename", "")).name - alias_map: dict[str, Any] = link_data.get(diagram_filename, {}) - if not alias_map: + for node in doctree.findall(PlantumlNode): + source_key = _node_source_key(node, app.srcdir, workspace_offset, source_keys) + if not source_key: + continue + + idmap = idmap_by_source.get(source_key) + if idmap is None: continue resolved_links: dict[str, str] = {} - for alias, info in alias_map.items(): - target_file: str = info["target_file"] - - if target_file.endswith(".puml"): - target_basename = Path(target_file).name - target_info = puml_docnames.get(target_basename) - if target_info is not None: - target_docname, target_anchor = target_info - # SVG files are stored in _images/ (one level below the - # HTML output root). Using get_relative_uri() would give a - # page-to-page relative URL, but that path is interpreted - # relative to the SVG file, not the parent HTML page — - # causing the browser to open the raw SVG. Instead, build - # the URL relative to _images/ by prepending "../" to the - # root-relative page URI returned by get_target_uri(). - page_uri = app.builder.get_target_uri(target_docname) - url = f"../{page_uri}" - if target_anchor: - url += f"#{target_anchor}" - resolved_links[alias] = url - else: - logger.debug( - "clickable_plantuml: target diagram '%s' for alias " - "'%s' not found in any document", - target_file, - alias, - ) - elif target_file.startswith(absolute_url_prefixes): - resolved_links[alias] = target_file - else: - resolved_links[alias] = target_file + seen_aliases_in_node: set[str] = set() + for ref in idmap.get("references", []): + alias: str = ref.get("alias", "") + fqn: str = ref.get("id", alias) + if not alias or alias in seen_aliases_in_node: + continue + + target_source = _resolve_definer(alias, fqn, source_key, definition_index) + if target_source is None: + continue + + target_info = puml_docnames.get(target_source) + if target_info is None: + logger.debug( + "clickable_plantuml: definer '%s' for alias '%s' not" + " found in any document — skipping", + target_source, + alias, + ) + continue + + target_docname, target_anchor = target_info + url = _build_target_url( + app.builder, + output_format, + imagedir, + docname, + target_docname, + target_anchor, + ) + + resolved_links[alias] = _escape_plantuml_url(url) + seen_aliases_in_node.add(alias) if resolved_links: node["uml"] = _inject_links_into_uml(node.get("uml", ""), resolved_links) @@ -294,7 +588,7 @@ def setup(app: Sphinx) -> dict[str, Any]: app.connect("env-merge-info", on_env_merge_info) return { - "version": "4.0", + "version": "5.0", "parallel_read_safe": True, "parallel_write_safe": True, } diff --git a/plantuml/sphinx/clickable_plantuml/tests/BUILD b/plantuml/sphinx/clickable_plantuml/tests/BUILD new file mode 100644 index 00000000..2fbed788 --- /dev/null +++ b/plantuml/sphinx/clickable_plantuml/tests/BUILD @@ -0,0 +1,24 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +load("@pip_rules_score//:requirements.bzl", "requirement") +load("@score_tooling//python_basics:defs.bzl", "score_py_pytest") + +score_py_pytest( + name = "unit_tests", + srcs = ["test_clickable_plantuml.py"], + deps = [ + "//plantuml/sphinx/clickable_plantuml", + requirement("docutils"), + requirement("sphinx"), + ], +) diff --git a/plantuml/sphinx/clickable_plantuml/tests/test_clickable_plantuml.py b/plantuml/sphinx/clickable_plantuml/tests/test_clickable_plantuml.py new file mode 100644 index 00000000..836ef603 --- /dev/null +++ b/plantuml/sphinx/clickable_plantuml/tests/test_clickable_plantuml.py @@ -0,0 +1,340 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Unit tests for the clickable_plantuml Sphinx extension helpers.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from sphinx.errors import ExtensionError + +from clickable_plantuml import ( + _build_target_url, + _escape_plantuml_url, + _inject_links_into_uml, + _load_idmap_files, + _node_source_key, + _proximity_tiebreak, + _resolve_definer, +) + + +def _write_idmap( + directory: Path, + name: str, + source: str, + defines: list[dict[str, str]] | None = None, + references: list[dict[str, str]] | None = None, +) -> None: + directory.mkdir(parents=True, exist_ok=True) + (directory / name).write_text( + json.dumps( + { + "source": source, + "defines": defines or [], + "references": references or [], + } + ), + encoding="utf-8", + ) + + +# --------------------------------------------------------------------------- +# index build +# --------------------------------------------------------------------------- + + +def test_load_idmap_builds_source_and_definition_indices(tmp_path: Path) -> None: + _write_idmap( + tmp_path / "a", + "proxy.idmap.json", + "pkg/a/proxy.puml", + defines=[{"alias": "Proxy", "id": "pkg.Proxy"}], + ) + _write_idmap( + tmp_path / "b", + "overview.idmap.json", + "pkg/b/overview.puml", + references=[{"alias": "Proxy", "id": "pkg.Proxy"}], + ) + + idmap_by_source, definition_index = _load_idmap_files(tmp_path) + + assert set(idmap_by_source) == {"pkg/a/proxy.puml", "pkg/b/overview.puml"} + # Both the alias and the FQN point at the definer. + assert definition_index["Proxy"] == ["pkg/a/proxy.puml"] + assert definition_index["pkg.Proxy"] == ["pkg/a/proxy.puml"] + + +def test_same_basename_in_different_dirs_are_distinct_keys(tmp_path: Path) -> None: + _write_idmap(tmp_path / "a", "proxy.idmap.json", "pkg/a/proxy.puml") + _write_idmap(tmp_path / "b", "proxy.idmap.json", "pkg/b/proxy.puml") + + idmap_by_source, _ = _load_idmap_files(tmp_path) + + # No basename collapse: two proxy.puml remain independently keyed. + assert set(idmap_by_source) == {"pkg/a/proxy.puml", "pkg/b/proxy.puml"} + + +def test_duplicate_canonical_key_raises_build_error(tmp_path: Path) -> None: + _write_idmap(tmp_path / "a", "one.idmap.json", "pkg/dup.puml") + _write_idmap(tmp_path / "b", "two.idmap.json", "pkg/dup.puml") + + with pytest.raises(ExtensionError, match="duplicate idmap source key"): + _load_idmap_files(tmp_path) + + +# --------------------------------------------------------------------------- +# node source-key resolution (workspace-offset + exact key match) +# --------------------------------------------------------------------------- + + +def test_node_source_key_matches_workspace_relative_key() -> None: + # sphinxcontrib.plantuml stores incdir + filename. After stripping the + # workspace offset, the remainder must match a canonical idmap key exactly. + node = { + "filename": "overview.puml", + "incdir": "plantuml/sphinx/example", + } + key = _node_source_key( + node, + "/workspace/plantuml/sphinx/example", + "/workspace", + {"plantuml/sphinx/example/overview.puml", "plantuml/sphinx/example/other.puml"}, + ) + + assert key == "plantuml/sphinx/example/overview.puml" + + +def test_node_source_key_same_basename_stays_distinct() -> None: + node = {"filename": "proxy.puml", "incdir": "pkg/b"} + key = _node_source_key( + node, + "/workspace/pkg/b", + "/workspace", + {"pkg/a/proxy.puml", "pkg/b/proxy.puml"}, + ) + + assert key == "pkg/b/proxy.puml" + + +def test_node_source_key_returns_none_when_unmatched() -> None: + node = {"filename": "stray.puml", "incdir": "pkg/b"} + key = _node_source_key(node, "/workspace/pkg/b", "/workspace", {"pkg/a/proxy.puml"}) + + assert key is None + + +def test_node_source_key_returns_none_without_filename() -> None: + assert ( + _node_source_key( + {"incdir": "x"}, "/workspace/pkg", "/workspace", {"pkg/a.puml"} + ) + is None + ) + + +def test_node_source_key_matches_bazel_staged_suffix_path() -> None: + node = { + "filename": "overview.puml", + "incdir": "../../../src/plantuml/sphinx/example", + } + key = _node_source_key( + node, + "/build/out/doc/plantuml/sphinx/example", + "/build/out/doc/plantuml/sphinx/example", + {"plantuml/sphinx/example/overview.puml", "plantuml/sphinx/example/other.puml"}, + ) + + assert key == "plantuml/sphinx/example/overview.puml" + + +# --------------------------------------------------------------------------- +# reference resolution: FQN-before-alias, self-link, single vs tie +# --------------------------------------------------------------------------- + + +def test_resolve_definer_prefers_fqn_over_alias() -> None: + definition_index = { + "Proxy": ["pkg/alias_hit.puml"], + "pkg.Proxy": ["pkg/fqn_hit.puml"], + } + + target = _resolve_definer("Proxy", "pkg.Proxy", "pkg/src.puml", definition_index) + + assert target == "pkg/fqn_hit.puml" + + +def test_resolve_definer_skips_self_link() -> None: + definition_index = {"Proxy": ["pkg/src.puml"]} + + target = _resolve_definer("Proxy", "Proxy", "pkg/src.puml", definition_index) + + assert target is None + + +def test_resolve_definer_single_candidate() -> None: + definition_index = {"Proxy": ["pkg/definer.puml"]} + + target = _resolve_definer("Proxy", "Proxy", "pkg/src.puml", definition_index) + + assert target == "pkg/definer.puml" + + +def test_resolve_definer_tie_returns_none() -> None: + definition_index = {"Proxy": ["other/a/proxy.puml", "other/b/proxy.puml"]} + + target = _resolve_definer("Proxy", "Proxy", "pkg/src.puml", definition_index) + + assert target is None + + +def test_resolve_definer_proximity_breaks_tie() -> None: + definition_index = {"Proxy": ["pkg/near/proxy.puml", "far/proxy.puml"]} + + target = _resolve_definer("Proxy", "Proxy", "pkg/src.puml", definition_index) + + assert target == "pkg/near/proxy.puml" + + +def test_resolve_definer_rejects_non_canonical_candidate() -> None: + definition_index = {"Proxy": ["/abs/definer.puml"]} + + with pytest.raises(ValueError): + _resolve_definer("Proxy", "Proxy", "pkg/src.puml", definition_index) + + +def test_resolve_definer_rejects_non_canonical_source_key() -> None: + definition_index = {"Proxy": ["pkg/definer.puml"]} + + with pytest.raises(ValueError): + _resolve_definer("Proxy", "Proxy", "/abs/src.puml", definition_index) + + +# --------------------------------------------------------------------------- +# proximity tiebreak +# --------------------------------------------------------------------------- + + +def test_proximity_tiebreak_single_winner() -> None: + assert _proximity_tiebreak("a/b/c.puml", ["a/b/x.puml", "z/y.puml"]) == "a/b/x.puml" + + +def test_proximity_tiebreak_tie_returns_none() -> None: + assert _proximity_tiebreak("a/b/c.puml", ["x/one.puml", "y/two.puml"]) is None + + +def test_proximity_tiebreak_rejects_non_canonical_key() -> None: + with pytest.raises(ValueError): + _proximity_tiebreak("/abs/src.puml", ["a/b.puml"]) + + +# --------------------------------------------------------------------------- +# URL building: svg_obj vs svg, anchor +# --------------------------------------------------------------------------- + + +class _FakeBuilder: + def get_target_uri(self, docname: str) -> str: + return f"{docname}.html" + + def get_relative_uri(self, from_docname: str, to_docname: str) -> str: + return f"{to_docname}.html" + + +def test_build_target_url_svg_obj_is_relative_to_imagedir() -> None: + url = _build_target_url( + _FakeBuilder(), "svg_obj", "_images", "index", "design/proxy", None + ) + + # svg_obj links resolve relative to _images/, so climb out of it first. + assert url == "../design/proxy.html" + + +def test_build_target_url_inline_svg_is_page_relative() -> None: + url = _build_target_url( + _FakeBuilder(), "svg", "_images", "index", "design/proxy", None + ) + + assert url == "design/proxy.html" + + +def test_build_target_url_appends_anchor() -> None: + url = _build_target_url( + _FakeBuilder(), "svg", "_images", "index", "design/proxy", "section-1" + ) + + assert url == "design/proxy.html#section-1" + + +# --------------------------------------------------------------------------- +# URL escaping: percent-encode, anchor, no bare '#' +# --------------------------------------------------------------------------- + + +def test_escape_plantuml_url_percent_encodes_brackets_and_spaces() -> None: + escaped = _escape_plantuml_url("path/a b[c].html") + + assert "[" not in escaped and "]" not in escaped and " " not in escaped + + +def test_escape_plantuml_url_preserves_fragment_separator() -> None: + escaped = _escape_plantuml_url("design/proxy.html#my-section") + + assert escaped == "design/proxy.html#my-section" + + +def test_escape_plantuml_url_encodes_literal_hash_without_fragment() -> None: + # A '#' that is not the fragment separator must be encoded so it cannot + # break the PlantUML [[ ]] directive. + escaped = _escape_plantuml_url("a#b#c") + + # Only the first '#' is treated as the fragment separator; the rest encoded. + assert escaped.count("#") == 1 + + +# --------------------------------------------------------------------------- +# UML injection + one-URL-per-alias dedup contract +# --------------------------------------------------------------------------- + + +def test_inject_links_inserts_directive_before_enduml() -> None: + uml = "@startuml\n[A] --> [B]\n@enduml\n" + + result = _inject_links_into_uml(uml, {"A": "a.html"}) + + assert "url of A is [[a.html]]" in result + assert result.index("url of A") < result.index("@enduml") + + +def test_inject_links_skips_unsafe_alias() -> None: + uml = "@startuml\n@enduml\n" + + result = _inject_links_into_uml(uml, {"bad alias!": "x.html"}) + + assert "url of" not in result + + +def test_one_url_per_alias_dedup_contract() -> None: + # The resolved_links dict in on_doctree_resolved keys by alias, so an + # alias maps to exactly one URL (last write wins). Emulate that contract. + resolved_links: dict[str, str] = {} + resolved_links["A"] = "first.html" + resolved_links["A"] = "second.html" + + uml = _inject_links_into_uml("@startuml\n@enduml\n", resolved_links) + + assert uml.count("url of A is") == 1 + assert "[[second.html]]" in uml