Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions scripts/codelens.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,38 @@ def resolve_workspace(workspace_arg: Optional[str] = None) -> str:
# ─── Auto-Setup: Registry Bootstrap ────────────────────────────

def _registry_exists(workspace: str) -> bool:
"""Check if a valid registry exists for the workspace."""
"""Check if a valid registry exists for the workspace.

A registry is considered valid when at least one of these is true:

1. A legacy JSON registry file exists (``backend.json`` or
``frontend.json``) — the pre-migration state used by older
workspaces. Preserved so unmigrated workspaces keep working.
2. A populated SQLite database exists at ``.codelens/codelens.db``
— the post-migration state. "Populated" means the file exists
AND the ``symbols`` table has at least one row, so an empty or
corrupt db is NOT treated as a valid registry (issue #35).

Without path 2, a workspace that ran ``migrate`` and then deleted
its JSON files was treated as having no registry, triggering an
unnecessary ``init + scan`` on every command and discarding the
migrated SQLite data.
"""
codelens_dir = os.path.join(workspace, ".codelens")
if not os.path.isdir(codelens_dir):
return False
# Check for at least one registry file

# Path 1: legacy JSON registry — still works for unmigrated workspaces.
backend_json = os.path.join(codelens_dir, "backend.json")
frontend_json = os.path.join(codelens_dir, "frontend.json")
return os.path.exists(backend_json) or os.path.exists(frontend_json)
if os.path.exists(backend_json) or os.path.exists(frontend_json):
return True

# Path 2: migrated SQLite registry — must exist AND be populated,
# so an empty/corrupt db does not falsely satisfy the check (issue #35).
from persistent_registry import db_is_populated
db_path = os.path.join(codelens_dir, "codelens.db")
return db_is_populated(db_path)


def _auto_setup(workspace: str) -> Dict[str, Any]:
Expand Down
12 changes: 9 additions & 3 deletions scripts/commands/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Dict, Any

from utils import logger
from persistent_registry import PersistentRegistry, db_exists, is_sqlite_available
from persistent_registry import PersistentRegistry, db_exists, db_is_populated, is_sqlite_available
from commands import register_command


Expand Down Expand Up @@ -51,10 +51,16 @@ def cmd_migrate(workspace: str, db_path: str = None, verify: bool = False) -> Di

# Step 2: Check if migration is needed
effective_db_path = db_path or os.path.join(workspace, ".codelens", "codelens.db")
if os.path.exists(effective_db_path):
# Only skip if the db is actually populated. ``scan`` creates an empty
# db shell via ``store_scan_result`` (which writes to ``analysis_cache``
# but not ``symbols``), so a bare ``os.path.exists`` check here would
# skip the real JSON→SQLite migration and leave ``symbols`` empty —
# which in turn makes ``_registry_exists`` return False after the JSON
# files are deleted (issue #35).
if db_is_populated(effective_db_path):
return {
"status": "ok",
"message": "SQLite database already exists. Use --verify to check integrity.",
"message": "SQLite database already exists and is populated. Use --verify to check integrity.",
"db_path": effective_db_path,
"hint": "To re-migrate, delete the .codelens/codelens.db file first.",
}
Expand Down
32 changes: 32 additions & 0 deletions scripts/persistent_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,38 @@ def db_exists(workspace: str, db_path: Optional[str] = None) -> bool:
return os.path.exists(path)


def db_is_populated(db_path: str) -> bool:
"""Return True if the SQLite db at ``db_path`` exists and has data.

"Populated" means the file exists, can be opened as SQLite, and the
``symbols`` table contains at least one row — the table populated by
both ``scan`` (via ``insert_symbols_batch``) and ``migrate_from_json``.

Used to distinguish a real migrated/scan-populated db from an empty
or corrupt one. Callers:

- ``codelens._registry_exists`` treats an empty/corrupt db as "no
valid registry" so auto-setup still runs (issue #35).
- ``commands.migrate.cmd_migrate`` only skips re-migration when the
db is actually populated — otherwise an empty db shell created by
``store_scan_result`` (which only writes to ``analysis_cache``,
not ``symbols``) would block the real JSON→SQLite migration.

Any error (missing file, corrupt db, missing table, locked) returns
False so callers fall through to the safe default.
"""
if not os.path.exists(db_path):
return False
try:
conn = sqlite3.connect(db_path, timeout=2.0)
try:
return conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0] > 0
finally:
conn.close()
except sqlite3.Error:
return False


def get_registry(workspace: str, db_path: Optional[str] = None) -> PersistentRegistry:
"""Get a PersistentRegistry instance for the workspace."""
return PersistentRegistry(workspace, db_path)
Expand Down
76 changes: 74 additions & 2 deletions scripts/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,50 @@ def save_config(workspace: str, config: Dict[str, Any]) -> None:


def load_frontend_registry(workspace: str) -> Dict[str, Any]:
"""Load frontend.json registry."""
"""Load frontend.json registry, falling back to SQLite if missing.

JSON is the primary source (pre-migrate workspaces). When the JSON
file is absent — e.g., a migrated workspace whose JSON files were
deleted post-migrate (issue #35) — fall back to the SQLite cache
populated by ``PersistentRegistry.store_frontend_registry`` during
``codelens migrate``. If neither source has data, return an empty
registry.
"""
path = os.path.join(get_codelens_dir(workspace), 'frontend.json')
if os.path.exists(path):
try:
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
logger.debug("Failed to parse frontend registry, returning empty", exc_info=True)
# JSON missing or unreadable — try the SQLite cache (issue #35).
return _load_frontend_registry_from_sqlite(workspace)


def _load_frontend_registry_from_sqlite(workspace: str) -> Dict[str, Any]:
"""Load the frontend registry from the SQLite cache, or empty if unavailable.

Used as a fallback when ``frontend.json`` is missing (e.g., deleted
after ``migrate``). Returns an empty registry if SQLite is unavailable,
the db doesn't exist, or the cache row is absent — so callers always
get a well-formed dict.
"""
try:
from persistent_registry import PersistentRegistry, is_sqlite_available
if not is_sqlite_available():
return _empty_frontend_registry(workspace)
reg = PersistentRegistry(workspace)
cached = reg.load_frontend_registry()
reg.close()
if cached:
return cached
except Exception:
logger.debug("SQLite frontend registry fallback failed", exc_info=True)
return _empty_frontend_registry(workspace)


def _empty_frontend_registry(workspace: str) -> Dict[str, Any]:
"""Return an empty frontend registry with the expected schema."""
return {
"last_updated": "",
"workspace": workspace,
Expand All @@ -128,14 +164,50 @@ def save_frontend_registry(workspace: str, data: Dict[str, Any]) -> None:


def load_backend_registry(workspace: str) -> Dict[str, Any]:
"""Load backend.json registry."""
"""Load backend.json registry, falling back to SQLite if missing.

JSON is the primary source (pre-migrate workspaces). When the JSON
file is absent — e.g., a migrated workspace whose JSON files were
deleted post-migrate (issue #35) — fall back to the SQLite cache
populated by ``PersistentRegistry.store_backend_registry`` during
``codelens migrate``. If neither source has data, return an empty
registry.
"""
path = os.path.join(get_codelens_dir(workspace), 'backend.json')
if os.path.exists(path):
try:
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
logger.debug("Failed to parse backend registry, returning empty", exc_info=True)
# JSON missing or unreadable — try the SQLite cache (issue #35).
return _load_backend_registry_from_sqlite(workspace)


def _load_backend_registry_from_sqlite(workspace: str) -> Dict[str, Any]:
"""Load the backend registry from the SQLite cache, or empty if unavailable.

Used as a fallback when ``backend.json`` is missing (e.g., deleted
after ``migrate``). Returns an empty registry if SQLite is unavailable,
the db doesn't exist, or the cache row is absent — so callers always
get a well-formed dict.
"""
try:
from persistent_registry import PersistentRegistry, is_sqlite_available
if not is_sqlite_available():
return _empty_backend_registry(workspace)
reg = PersistentRegistry(workspace)
cached = reg.load_backend_registry()
reg.close()
if cached:
return cached
except Exception:
logger.debug("SQLite backend registry fallback failed", exc_info=True)
return _empty_backend_registry(workspace)


def _empty_backend_registry(workspace: str) -> Dict[str, Any]:
"""Return an empty backend registry with the expected schema."""
return {
"last_updated": "",
"workspace": workspace,
Expand Down
154 changes: 154 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from commands.query import cmd_query
from commands.list import cmd_list
from commands.init import cmd_init
from commands.migrate import cmd_migrate
from codelens import _registry_exists


def _create_sample_workspace():
Expand Down Expand Up @@ -331,3 +333,155 @@ def test_check_full_cli_invocation_with_positional(self):
finally:
import shutil
shutil.rmtree(ws, ignore_errors=True)


# ─── _registry_exists after migrate (issue #35) ─────────────────────


class TestRegistryExistsSqlite:
"""Regression guard for issue #35.

Before the fix, ``_registry_exists`` only checked for
``backend.json`` / ``frontend.json``. A workspace that had been
migrated to SQLite (``codelens migrate``) and whose JSON files
were then deleted was always treated as having no registry, so
every subsequent command silently re-ran ``init + scan`` and threw
away the migrated data — negating the whole point of ``migrate``.

The fix adds a second path: a populated ``codelens.db`` also
counts as a valid registry. "Populated" means the ``symbols``
table has at least one row, so an empty or corrupt db is NOT
falsely treated as valid.
"""

def test_registry_exists_after_migrate_with_json_deleted(self):
"""migrate → delete JSON → ``_registry_exists`` must return True.

This is the exact repro from issue #35.
"""
ws = _create_sample_workspace()
try:
cmd_init(ws)
cmd_scan(ws)
# Sanity: JSON files exist before migrate.
assert os.path.exists(os.path.join(ws, ".codelens", "backend.json"))
assert os.path.exists(os.path.join(ws, ".codelens", "frontend.json"))

# Migrate JSON → SQLite.
migrate_result = cmd_migrate(ws)
assert migrate_result["status"] == "ok", migrate_result

# Delete the JSON files (post-migrate cleanup).
os.remove(os.path.join(ws, ".codelens", "backend.json"))
os.remove(os.path.join(ws, ".codelens", "frontend.json"))

# The fix: the migrated SQLite db must still satisfy
# _registry_exists so commands don't trigger auto-setup.
assert _registry_exists(ws) is True, (
"issue #35 regression: migrated workspace with deleted JSON "
"files is not recognized as having a valid registry"
)
finally:
import shutil
shutil.rmtree(ws, ignore_errors=True)

def test_registry_exists_false_for_empty_db(self):
"""An empty SQLite db (``symbols`` has 0 rows) must NOT be valid.

Issue #35 constraint: 'db kosong/corrupt tidak salah dianggap
registry valid'.
"""
import sqlite3

with tempfile.TemporaryDirectory() as ws:
codelens_dir = os.path.join(ws, ".codelens")
os.makedirs(codelens_dir)
db_path = os.path.join(codelens_dir, "codelens.db")
# Create a real SQLite db with the symbols table but no rows.
conn = sqlite3.connect(db_path)
conn.execute(
"CREATE TABLE symbols (id INTEGER PRIMARY KEY, name TEXT)"
)
conn.commit()
conn.close()

# No JSON files, empty db → must be False so auto-setup runs.
assert _registry_exists(ws) is False, (
"empty SQLite db should not be treated as a valid registry"
)

def test_registry_exists_false_for_corrupt_db(self):
"""A corrupt SQLite db (random bytes) must NOT be valid."""
with tempfile.TemporaryDirectory() as ws:
codelens_dir = os.path.join(ws, ".codelens")
os.makedirs(codelens_dir)
db_path = os.path.join(codelens_dir, "codelens.db")
with open(db_path, "wb") as f:
f.write(b"not a sqlite database file - corrupt bytes")

# No JSON files, corrupt db → must be False.
assert _registry_exists(ws) is False, (
"corrupt SQLite db should not be treated as a valid registry"
)

def test_registry_exists_true_for_json_only_workspace(self):
"""Legacy JSON-only workspace (no migrate) must still work.

Ensures the fix is purely additive — path 1 (JSON check) is
unchanged and still detects pre-migration workspaces, even
though ``scan`` may also create an empty ``codelens.db`` shell
via ``store_scan_result`` (which writes only to
``analysis_cache``, not ``symbols``).
"""
ws = _create_sample_workspace()
try:
cmd_init(ws)
cmd_scan(ws)
# Pre-migration state: JSON files exist.
assert os.path.exists(os.path.join(ws, ".codelens", "backend.json"))
assert os.path.exists(os.path.join(ws, ".codelens", "frontend.json"))

assert _registry_exists(ws) is True, (
"JSON-only workspace should still be detected as valid "
"(pre-existing behavior must not regress)"
)
finally:
import shutil
shutil.rmtree(ws, ignore_errors=True)

def test_query_uses_sqlite_fallback_after_json_deleted(self):
"""End-to-end: query must return real data from SQLite after
``migrate`` + JSON deletion — not just avoid auto-setup, but
actually serve the migrated data (issue #35: 'padahal data
lengkap sudah ada di codelens.db').

Verifies that ``load_backend_registry`` / ``load_frontend_registry``
fall back to the SQLite cache populated by ``migrate`` when the
JSON files are missing.
"""
ws = _create_sample_workspace()
try:
cmd_init(ws)
cmd_scan(ws)
# Sanity: the symbol we'll query exists in the JSON registry.
pre = cmd_query("verify_token", ws, domain="backend")
assert pre["found"] is True, (
"verify_token must exist in JSON registry before migrate"
)

# Migrate → delete JSON → query must still find the symbol.
mig = cmd_migrate(ws)
assert mig["status"] == "ok", mig
os.remove(os.path.join(ws, ".codelens", "backend.json"))
os.remove(os.path.join(ws, ".codelens", "frontend.json"))

post = cmd_query("verify_token", ws, domain="backend")
assert post["found"] is True, (
"query must return data from SQLite cache after JSON deletion "
"(issue #35: migrated data must be usable, not just present)"
)
assert post["type"] == "function"
assert post["node"]["fn"] == "verify_token"
finally:
import shutil
shutil.rmtree(ws, ignore_errors=True)
Loading
Loading