Wolfvin · Wolfvin · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/scripts/codelens.py b/scripts/codelens.py
@@ -331,7 +331,11 @@
 
 # ─── Post-Processing: --top N ──────────────────────────────────
 
-# Keys that commonly contain list results across all commands
+# Keys that commonly contain list results across all commands.
+# Used by _apply_lite to find the "primary result list" for lite mode output.
+# NOTE: _apply_top_n no longer relies on this list — it auto-discovers all
+# list-valued keys at runtime (issue #36). This list is kept solely for
+# _apply_lite's priority-ordered lookup.
 _LIST_KEYS = [
     "functions", "findings", "leaks", "hints", "issues", "matches", "results",
     "violations", "entrypoints", "routes", "stores", "callers", "callees",
@@ -342,12 +346,17 @@
     "risks", "drift",  # refactor-safe, config-drift
 ]
 
-# Nested containers that may contain category-keyed lists
-_NESTED_LIST_KEYS = [
-    "results",  # dead-code: results{category:[]}
-    "issues",   # missing-refs: issues{type:[]}
-    "cycles",   # circular: cycles{type:[]}
-]
+# Keys whose values should NEVER be truncated by --top, even if they're lists.
+# These are structural/metadata keys (command names, engine names, etc.) that
+# are not "result lists" in the --top N contract sense. Adding a key here is
+# an explicit opt-out — the default is to truncate all list-valued keys (issue #36).
+_NO_TOP_KEYS = frozenset({
+    "available_commands",  # help/list-commands: metadata, not results
+    "engines",             # analyze: engine list is structural
+    "supported_languages", # setup/info: metadata
+    "categories",          # category names are metadata
+    "tags",                # tag names are metadata
+})
 
 # Commands that return large lists — get smart default --top
 _LIST_COMMANDS = {
@@ -402,15 +411,31 @@
 
 
 def _apply_top_n(result: Dict[str, Any], top_n: int, command: str = "") -> Dict[str, Any]:
-    """Limit all list/array results to top N items. Sort by relevance first. Adds truncated flags."""
+    """Limit all list/array results to top N items. Sort by relevance first. Adds truncated flags.
+
+    Auto-discovers ALL list-valued keys at runtime — no allowlist needed (issue #36).
+    This ensures --top N is a universal contract: any command, present or future,
+    that returns a list under any key name will be truncated. Keys in _NO_TOP_KEYS
+    are exempt (structural/metadata keys like ``available_commands``).
+
+    Also handles category-keyed dicts (e.g., ``by_category{cat: [...]}``) and
+    nested containers (e.g., ``results{category: [...]}``) by scanning dict
+    values for lists.
+    """
     if not isinstance(result, dict) or top_n <= 0:
         return result
 
     # Get sort strategy for this command
     sort_key, sort_desc = _SORT_STRATEGIES.get(command, (None, False))
 
-    for key in _LIST_KEYS:
-        val = result.get(key)
+    # Auto-discover: iterate ALL top-level keys (snapshot to allow in-place additions).
+    # Truncate every list-valued key except those in _NO_TOP_KEYS or starting with '_'.
+    # Also scan dict-valued keys for category-keyed lists.
+    for key in list(result.keys()):
+        if key in _NO_TOP_KEYS or key.startswith("_"):
+            continue
+        val = result[key]
+
         if isinstance(val, list) and len(val) > top_n:
             # Sort by relevance before truncating
             if sort_key:
@@ -421,23 +446,8 @@
             result[f"{key}_truncated"] = True
             result[f"{key}_total"] = len(val)
         elif isinstance(val, dict):
-            # Handle category-keyed dicts like by_category{cat: [...]}
-            for sub_key, sub_val in val.items():
-                if isinstance(sub_val, list) and len(sub_val) > top_n:
-                    # Sort items within each category
-                    if sort_key:
-                        sub_val = _sort_items(sub_val, sort_key, sort_desc)
-                        val[sub_key] = sub_val[:top_n]
-                    else:
-                        val[sub_key] = sub_val[:top_n]
-                    if "_truncated_categories" not in result:
-                        result["_truncated_categories"] = {}
-                    result["_truncated_categories"][sub_key] = len(sub_val)
-
-    # Handle nested containers like dead-code's results{category:[]}
-    for key in _NESTED_LIST_KEYS:
-        val = result.get(key)
-        if isinstance(val, dict):
+            # Handle category-keyed dicts like by_category{cat: [...]} and
+            # nested containers like dead-code's results{category:[]}
             for sub_key, sub_val in val.items():
                 if isinstance(sub_val, list) and len(sub_val) > top_n:
                     if sort_key:
@@ -449,7 +459,8 @@
                         result["_truncated_categories"] = {}
                     result["_truncated_categories"][sub_key] = len(sub_val)
 
-    # Handle coverage_map in test-map (file:{fn:{...}})
+    # Handle coverage_map in test-map (file:{fn:{...}}) — dict-of-dicts, not
+    # dict-of-lists, so the auto-discover above doesn't catch it.
     if "coverage_map" in result and isinstance(result["coverage_map"], dict):
         cm = result["coverage_map"]
         if len(cm) > top_n:

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -15,6 +15,7 @@
 from commands.query import cmd_query
 from commands.list import cmd_list
 from commands.init import cmd_init
+from codelens import _apply_top_n, _NO_TOP_KEYS
 from commands.migrate import cmd_migrate
 from codelens import _registry_exists
 
@@ -335,8 +336,109 @@ def test_check_full_cli_invocation_with_positional(self):
             shutil.rmtree(ws, ignore_errors=True)
 
 
-# ─── _registry_exists after migrate (issue #35) ─────────────────────
+# ─── --top N universal truncation (issue #36) ──────────────────
+
+
+class TestTopNUniversalTruncation:
+    """Regression guard for issue #36: --top N must truncate ALL list-valued
+    keys in command output, not just those in a hardcoded allowlist.
+
+    Before the fix, ``_apply_top_n`` only truncated keys listed in ``_LIST_KEYS``
+    (28 names). A new command returning a list under a non-standard key (e.g.,
+    ``entities``, ``my_things``, ``nodes``) would silently ignore ``--top N``,
+    violating the documented *"Limit list results to top N items"* contract from
+    ``SKILL-QUICK.md`` and risking token overflow for MCP clients.
 
+    The fix replaces the allowlist with runtime auto-discovery: every top-level
+    list-valued key is truncated, except those in ``_NO_TOP_KEYS`` (structural/
+    metadata keys like ``available_commands``).
+    """
+
+    def test_non_standard_key_gets_truncated(self):
+        """A hypothetical key name not in any allowlist must still be truncated."""
+        result = {"status": "ok", "entities": [{"id": i} for i in range(100)]}
+        out = _apply_top_n(result, 5)
+        assert len(out["entities"]) == 5, (
+            f"Expected 5 items after truncation, got {len(out['entities'])}. "
+            "Non-standard key 'entities' was not truncated by --top."
+        )
+        assert out["entities_truncated"] is True
+        assert out["entities_total"] == 100
+
+    def test_multiple_non_standard_keys_truncated(self):
+        """Multiple non-standard list keys must all be truncated."""
+        result = {
+            "status": "ok",
+            "widgets": [{"id": i} for i in range(50)],
+            "gadgets": [{"id": i} for i in range(30)],
+        }
+        out = _apply_top_n(result, 10)
+        assert len(out["widgets"]) == 10
+        assert len(out["gadgets"]) == 10
+        assert out["widgets_truncated"] is True
+        assert out["gadgets_truncated"] is True
+
+    def test_no_top_keys_exempt(self):
+        """Keys in _NO_TOP_KEYS must NOT be truncated even if they're lists."""
+        result = {
+            "status": "ok",
+            "available_commands": [f"cmd_{i}" for i in range(50)],
+        }
+        out = _apply_top_n(result, 5)
+        assert len(out["available_commands"]) == 50, (
+            "available_commands should NOT be truncated (it's in _NO_TOP_KEYS)"
+        )
+        assert "available_commands_truncated" not in out
+
+    def test_standard_keys_still_truncated(self):
+        """Existing allowlisted keys (e.g., 'findings') must still be truncated."""
+        result = {"findings": [{"name": f"f_{i}"} for i in range(50)]}
+        out = _apply_top_n(result, 10)
+        assert len(out["findings"]) == 10
+        assert out["findings_truncated"] is True
+        assert out["findings_total"] == 50
+
+    def test_top_zero_means_no_truncation_for_non_standard_key(self):
+        """--top 0 (unlimited) must apply to non-standard keys too."""
+        result = {"entities": [{"id": i} for i in range(100)]}
+        out = _apply_top_n(result, 0)
+        assert len(out["entities"]) == 100
+
+    def test_nested_dict_non_standard_key_truncated(self):
+        """Non-standard dict-of-lists key must also be truncated."""
+        result = {
+            "groups": {
+                "alpha": [{"id": i} for i in range(30)],
+                "beta": [{"id": i} for i in range(5)],
+            }
+        }
+        out = _apply_top_n(result, 10)
+        assert len(out["groups"]["alpha"]) == 10
+        assert len(out["groups"]["beta"]) == 5  # under limit, no truncation
+
+    def test_underscore_prefixed_keys_skipped(self):
+        """Internal keys starting with '_' should not be processed."""
+        result = {
+            "_meta": [{"x": i} for i in range(50)],
+            "findings": [{"name": f"f_{i}"} for i in range(50)],
+        }
+        out = _apply_top_n(result, 5)
+        assert len(out["_meta"]) == 50, "_-prefixed keys should not be truncated"
+        assert len(out["findings"]) == 5
+
+    def test_repro_from_issue(self):
+        """Exact repro from issue #36: hypothetical command returning 'entities'."""
+        # Simulate what a new command might return
+        result = {"status": "ok", "entities": [{"id": i} for i in range(1000)]}
+        out = _apply_top_n(result, 5)
+        # Before fix: len == 1000 (silent --top bypass)
+        # After fix: len == 5 (universal truncation)
+        assert len(out["entities"]) == 5, (
+            "Issue #36 repro: --top 5 did not truncate 'entities' key. "
+            "This means a new command returning non-standard keys would "
+            "silently bypass --top, risking token overflow for MCP clients."
+        )
+# ─── _registry_exists after migrate (issue #35) ─────────────────────
 
 class TestRegistryExistsSqlite:
     """Regression guard for issue #35.