Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 39 additions & 28 deletions scripts/codelens.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,11 @@

# ─── Post-Processing: --top N ──────────────────────────────────

# Keys that commonly contain list results across all commands
# Keys that commonly contain list results across all commands.
# Used by _apply_lite to find the "primary result list" for lite mode output.
# NOTE: _apply_top_n no longer relies on this list — it auto-discovers all
# list-valued keys at runtime (issue #36). This list is kept solely for
# _apply_lite's priority-ordered lookup.
_LIST_KEYS = [
"functions", "findings", "leaks", "hints", "issues", "matches", "results",
"violations", "entrypoints", "routes", "stores", "callers", "callees",
Expand All @@ -342,12 +346,17 @@
"risks", "drift", # refactor-safe, config-drift
]

# Nested containers that may contain category-keyed lists
_NESTED_LIST_KEYS = [
"results", # dead-code: results{category:[]}
"issues", # missing-refs: issues{type:[]}
"cycles", # circular: cycles{type:[]}
]
# Keys whose values should NEVER be truncated by --top, even if they're lists.
# These are structural/metadata keys (command names, engine names, etc.) that
# are not "result lists" in the --top N contract sense. Adding a key here is
# an explicit opt-out — the default is to truncate all list-valued keys (issue #36).
_NO_TOP_KEYS = frozenset({
"available_commands", # help/list-commands: metadata, not results
"engines", # analyze: engine list is structural
"supported_languages", # setup/info: metadata
"categories", # category names are metadata
"tags", # tag names are metadata
})

# Commands that return large lists — get smart default --top
_LIST_COMMANDS = {
Expand Down Expand Up @@ -402,15 +411,31 @@


def _apply_top_n(result: Dict[str, Any], top_n: int, command: str = "") -> Dict[str, Any]:
"""Limit all list/array results to top N items. Sort by relevance first. Adds truncated flags."""
"""Limit all list/array results to top N items. Sort by relevance first. Adds truncated flags.

Auto-discovers ALL list-valued keys at runtime — no allowlist needed (issue #36).
This ensures --top N is a universal contract: any command, present or future,
that returns a list under any key name will be truncated. Keys in _NO_TOP_KEYS
are exempt (structural/metadata keys like ``available_commands``).

Also handles category-keyed dicts (e.g., ``by_category{cat: [...]}``) and
nested containers (e.g., ``results{category: [...]}``) by scanning dict
values for lists.
"""
if not isinstance(result, dict) or top_n <= 0:
return result

# Get sort strategy for this command
sort_key, sort_desc = _SORT_STRATEGIES.get(command, (None, False))

for key in _LIST_KEYS:
val = result.get(key)
# Auto-discover: iterate ALL top-level keys (snapshot to allow in-place additions).
# Truncate every list-valued key except those in _NO_TOP_KEYS or starting with '_'.
# Also scan dict-valued keys for category-keyed lists.
for key in list(result.keys()):

Check warning on line 434 in scripts/codelens.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this unnecessary `list()` call on an already iterable object.

See more on https://sonarcloud.io/project/issues?id=Wolfvin_CodeLens&issues=AZ8UAr-ACTodxBqR_3an&open=AZ8UAr-ACTodxBqR_3an&pullRequest=85
if key in _NO_TOP_KEYS or key.startswith("_"):
continue
val = result[key]

if isinstance(val, list) and len(val) > top_n:
# Sort by relevance before truncating
if sort_key:
Expand All @@ -421,23 +446,8 @@
result[f"{key}_truncated"] = True
result[f"{key}_total"] = len(val)
elif isinstance(val, dict):
# Handle category-keyed dicts like by_category{cat: [...]}
for sub_key, sub_val in val.items():
if isinstance(sub_val, list) and len(sub_val) > top_n:
# Sort items within each category
if sort_key:
sub_val = _sort_items(sub_val, sort_key, sort_desc)
val[sub_key] = sub_val[:top_n]
else:
val[sub_key] = sub_val[:top_n]
if "_truncated_categories" not in result:
result["_truncated_categories"] = {}
result["_truncated_categories"][sub_key] = len(sub_val)

# Handle nested containers like dead-code's results{category:[]}
for key in _NESTED_LIST_KEYS:
val = result.get(key)
if isinstance(val, dict):
# Handle category-keyed dicts like by_category{cat: [...]} and
# nested containers like dead-code's results{category:[]}
for sub_key, sub_val in val.items():
if isinstance(sub_val, list) and len(sub_val) > top_n:
if sort_key:
Expand All @@ -449,7 +459,8 @@
result["_truncated_categories"] = {}
result["_truncated_categories"][sub_key] = len(sub_val)

# Handle coverage_map in test-map (file:{fn:{...}})
# Handle coverage_map in test-map (file:{fn:{...}}) — dict-of-dicts, not
# dict-of-lists, so the auto-discover above doesn't catch it.
if "coverage_map" in result and isinstance(result["coverage_map"], dict):
cm = result["coverage_map"]
if len(cm) > top_n:
Expand Down
104 changes: 103 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from commands.query import cmd_query
from commands.list import cmd_list
from commands.init import cmd_init
from codelens import _apply_top_n, _NO_TOP_KEYS
from commands.migrate import cmd_migrate
from codelens import _registry_exists

Expand Down Expand Up @@ -335,8 +336,109 @@ def test_check_full_cli_invocation_with_positional(self):
shutil.rmtree(ws, ignore_errors=True)


# ─── _registry_exists after migrate (issue #35) ─────────────────────
# ─── --top N universal truncation (issue #36) ──────────────────


class TestTopNUniversalTruncation:
"""Regression guard for issue #36: --top N must truncate ALL list-valued
keys in command output, not just those in a hardcoded allowlist.

Before the fix, ``_apply_top_n`` only truncated keys listed in ``_LIST_KEYS``
(28 names). A new command returning a list under a non-standard key (e.g.,
``entities``, ``my_things``, ``nodes``) would silently ignore ``--top N``,
violating the documented *"Limit list results to top N items"* contract from
``SKILL-QUICK.md`` and risking token overflow for MCP clients.

The fix replaces the allowlist with runtime auto-discovery: every top-level
list-valued key is truncated, except those in ``_NO_TOP_KEYS`` (structural/
metadata keys like ``available_commands``).
"""

def test_non_standard_key_gets_truncated(self):
"""A hypothetical key name not in any allowlist must still be truncated."""
result = {"status": "ok", "entities": [{"id": i} for i in range(100)]}
out = _apply_top_n(result, 5)
assert len(out["entities"]) == 5, (
f"Expected 5 items after truncation, got {len(out['entities'])}. "
"Non-standard key 'entities' was not truncated by --top."
)
assert out["entities_truncated"] is True
assert out["entities_total"] == 100

def test_multiple_non_standard_keys_truncated(self):
"""Multiple non-standard list keys must all be truncated."""
result = {
"status": "ok",
"widgets": [{"id": i} for i in range(50)],
"gadgets": [{"id": i} for i in range(30)],
}
out = _apply_top_n(result, 10)
assert len(out["widgets"]) == 10
assert len(out["gadgets"]) == 10
assert out["widgets_truncated"] is True
assert out["gadgets_truncated"] is True

def test_no_top_keys_exempt(self):
"""Keys in _NO_TOP_KEYS must NOT be truncated even if they're lists."""
result = {
"status": "ok",
"available_commands": [f"cmd_{i}" for i in range(50)],
}
out = _apply_top_n(result, 5)
assert len(out["available_commands"]) == 50, (
"available_commands should NOT be truncated (it's in _NO_TOP_KEYS)"
)
assert "available_commands_truncated" not in out

def test_standard_keys_still_truncated(self):
"""Existing allowlisted keys (e.g., 'findings') must still be truncated."""
result = {"findings": [{"name": f"f_{i}"} for i in range(50)]}
out = _apply_top_n(result, 10)
assert len(out["findings"]) == 10
assert out["findings_truncated"] is True
assert out["findings_total"] == 50

def test_top_zero_means_no_truncation_for_non_standard_key(self):
"""--top 0 (unlimited) must apply to non-standard keys too."""
result = {"entities": [{"id": i} for i in range(100)]}
out = _apply_top_n(result, 0)
assert len(out["entities"]) == 100

def test_nested_dict_non_standard_key_truncated(self):
"""Non-standard dict-of-lists key must also be truncated."""
result = {
"groups": {
"alpha": [{"id": i} for i in range(30)],
"beta": [{"id": i} for i in range(5)],
}
}
out = _apply_top_n(result, 10)
assert len(out["groups"]["alpha"]) == 10
assert len(out["groups"]["beta"]) == 5 # under limit, no truncation

def test_underscore_prefixed_keys_skipped(self):
"""Internal keys starting with '_' should not be processed."""
result = {
"_meta": [{"x": i} for i in range(50)],
"findings": [{"name": f"f_{i}"} for i in range(50)],
}
out = _apply_top_n(result, 5)
assert len(out["_meta"]) == 50, "_-prefixed keys should not be truncated"
assert len(out["findings"]) == 5

def test_repro_from_issue(self):
"""Exact repro from issue #36: hypothetical command returning 'entities'."""
# Simulate what a new command might return
result = {"status": "ok", "entities": [{"id": i} for i in range(1000)]}
out = _apply_top_n(result, 5)
# Before fix: len == 1000 (silent --top bypass)
# After fix: len == 5 (universal truncation)
assert len(out["entities"]) == 5, (
"Issue #36 repro: --top 5 did not truncate 'entities' key. "
"This means a new command returning non-standard keys would "
"silently bypass --top, risking token overflow for MCP clients."
)
# ─── _registry_exists after migrate (issue #35) ─────────────────────

class TestRegistryExistsSqlite:
"""Regression guard for issue #35.
Expand Down
Loading