From e1fd383066c4e6794ad6445c84a9646b3aa2cfd4 Mon Sep 17 00:00:00 2001 From: Wolfvin Date: Mon, 29 Jun 2026 15:58:28 +0000 Subject: [PATCH] =?UTF-8?q?fix(registry):=20recognize=20migrated=20SQLite?= =?UTF-8?q?=20workspaces=20=E2=80=94=20closes=20#35?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem _registry_exists(workspace) in scripts/codelens.py only checked for .codelens/backend.json or .codelens/frontend.json. A workspace that had run migrate (JSON to SQLite) and then deleted the JSON files was always treated as having no registry — triggering init + scan on every subsequent command and discarding the migrated SQLite data. This was compounded by a second bug: cmd_migrate early-returned whenever codelens.db existed (even if empty), because scan now creates that file via store_scan_result (issue #31). So migrate silently no-op'd and never actually copied JSON into SQLite. ## Fix Three coupled changes, all gated on a new db_is_populated(db_path) helper in scripts/persistent_registry.py: 1. codelens._registry_exists — path 1 (JSON check) is unchanged; new path 2 returns True when codelens.db exists AND the symbols table has at least 1 row. Empty/corrupt dbs still return False so auto-setup runs (constraint: db kosong/corrupt tidak salah dianggap registry valid). 2. commands.migrate.cmd_migrate — only skips re-migration when the db is actually populated (was: os.path.exists). Without this, the empty db shell created by store_scan_result blocked the real JSON-to-SQLite migration. 3. registry.load_frontend_registry / load_backend_registry — added SQLite fallback. When JSON is missing, load from the analysis_cache rows written by PersistentRegistry.store_frontend_registry / store_backend_registry during migrate. Makes the migrated workspace actually usable (padahal data lengkap sudah ada di codelens.db). The helper is shared between (1) and (2) — no dead code, no duplication. ## Backward compatibility - JSON-only workspaces (no migrate) still work — path 1 returns True before any SQLite check, so pre-migration behavior is unchanged. - Empty/corrupt SQLite dbs are NOT treated as valid — auto-setup still runs, so users aren't stuck with a broken registry. - migrate still skips when the db is genuinely populated (the old 'already exists' behavior, just with a stricter condition). ## Files changed - scripts/codelens.py — _registry_exists: add SQLite path - scripts/persistent_registry.py — add db_is_populated() helper - scripts/commands/migrate.py — use db_is_populated for skip check - scripts/registry.py — SQLite fallback in load_*_registry - tests/test_cli.py — 5 new regression tests - tests/test_persistent_registry.py — update test_migrate_already_exists Closes https://github.com/Wolfvin/CodeLens/issues/35 --- scripts/codelens.py | 30 +++++- scripts/commands/migrate.py | 12 ++- scripts/persistent_registry.py | 32 +++++++ scripts/registry.py | 76 ++++++++++++++- tests/test_cli.py | 154 ++++++++++++++++++++++++++++++ tests/test_persistent_registry.py | 16 +++- 6 files changed, 309 insertions(+), 11 deletions(-) diff --git a/scripts/codelens.py b/scripts/codelens.py index 83b63a6..a5a15f8 100755 --- a/scripts/codelens.py +++ b/scripts/codelens.py @@ -232,14 +232,38 @@ def resolve_workspace(workspace_arg: Optional[str] = None) -> str: # ─── Auto-Setup: Registry Bootstrap ──────────────────────────── def _registry_exists(workspace: str) -> bool: - """Check if a valid registry exists for the workspace.""" + """Check if a valid registry exists for the workspace. + + A registry is considered valid when at least one of these is true: + + 1. A legacy JSON registry file exists (``backend.json`` or + ``frontend.json``) — the pre-migration state used by older + workspaces. Preserved so unmigrated workspaces keep working. + 2. A populated SQLite database exists at ``.codelens/codelens.db`` + — the post-migration state. "Populated" means the file exists + AND the ``symbols`` table has at least one row, so an empty or + corrupt db is NOT treated as a valid registry (issue #35). + + Without path 2, a workspace that ran ``migrate`` and then deleted + its JSON files was treated as having no registry, triggering an + unnecessary ``init + scan`` on every command and discarding the + migrated SQLite data. + """ codelens_dir = os.path.join(workspace, ".codelens") if not os.path.isdir(codelens_dir): return False - # Check for at least one registry file + + # Path 1: legacy JSON registry — still works for unmigrated workspaces. backend_json = os.path.join(codelens_dir, "backend.json") frontend_json = os.path.join(codelens_dir, "frontend.json") - return os.path.exists(backend_json) or os.path.exists(frontend_json) + if os.path.exists(backend_json) or os.path.exists(frontend_json): + return True + + # Path 2: migrated SQLite registry — must exist AND be populated, + # so an empty/corrupt db does not falsely satisfy the check (issue #35). + from persistent_registry import db_is_populated + db_path = os.path.join(codelens_dir, "codelens.db") + return db_is_populated(db_path) def _auto_setup(workspace: str) -> Dict[str, Any]: diff --git a/scripts/commands/migrate.py b/scripts/commands/migrate.py index bbb9882..d24b3fb 100644 --- a/scripts/commands/migrate.py +++ b/scripts/commands/migrate.py @@ -4,7 +4,7 @@ from typing import Dict, Any from utils import logger -from persistent_registry import PersistentRegistry, db_exists, is_sqlite_available +from persistent_registry import PersistentRegistry, db_exists, db_is_populated, is_sqlite_available from commands import register_command @@ -51,10 +51,16 @@ def cmd_migrate(workspace: str, db_path: str = None, verify: bool = False) -> Di # Step 2: Check if migration is needed effective_db_path = db_path or os.path.join(workspace, ".codelens", "codelens.db") - if os.path.exists(effective_db_path): + # Only skip if the db is actually populated. ``scan`` creates an empty + # db shell via ``store_scan_result`` (which writes to ``analysis_cache`` + # but not ``symbols``), so a bare ``os.path.exists`` check here would + # skip the real JSON→SQLite migration and leave ``symbols`` empty — + # which in turn makes ``_registry_exists`` return False after the JSON + # files are deleted (issue #35). + if db_is_populated(effective_db_path): return { "status": "ok", - "message": "SQLite database already exists. Use --verify to check integrity.", + "message": "SQLite database already exists and is populated. Use --verify to check integrity.", "db_path": effective_db_path, "hint": "To re-migrate, delete the .codelens/codelens.db file first.", } diff --git a/scripts/persistent_registry.py b/scripts/persistent_registry.py index 274f761..06b8015 100644 --- a/scripts/persistent_registry.py +++ b/scripts/persistent_registry.py @@ -1052,6 +1052,38 @@ def db_exists(workspace: str, db_path: Optional[str] = None) -> bool: return os.path.exists(path) +def db_is_populated(db_path: str) -> bool: + """Return True if the SQLite db at ``db_path`` exists and has data. + + "Populated" means the file exists, can be opened as SQLite, and the + ``symbols`` table contains at least one row — the table populated by + both ``scan`` (via ``insert_symbols_batch``) and ``migrate_from_json``. + + Used to distinguish a real migrated/scan-populated db from an empty + or corrupt one. Callers: + + - ``codelens._registry_exists`` treats an empty/corrupt db as "no + valid registry" so auto-setup still runs (issue #35). + - ``commands.migrate.cmd_migrate`` only skips re-migration when the + db is actually populated — otherwise an empty db shell created by + ``store_scan_result`` (which only writes to ``analysis_cache``, + not ``symbols``) would block the real JSON→SQLite migration. + + Any error (missing file, corrupt db, missing table, locked) returns + False so callers fall through to the safe default. + """ + if not os.path.exists(db_path): + return False + try: + conn = sqlite3.connect(db_path, timeout=2.0) + try: + return conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0] > 0 + finally: + conn.close() + except sqlite3.Error: + return False + + def get_registry(workspace: str, db_path: Optional[str] = None) -> PersistentRegistry: """Get a PersistentRegistry instance for the workspace.""" return PersistentRegistry(workspace, db_path) diff --git a/scripts/registry.py b/scripts/registry.py index 414f6c0..99925b2 100755 --- a/scripts/registry.py +++ b/scripts/registry.py @@ -99,7 +99,15 @@ def save_config(workspace: str, config: Dict[str, Any]) -> None: def load_frontend_registry(workspace: str) -> Dict[str, Any]: - """Load frontend.json registry.""" + """Load frontend.json registry, falling back to SQLite if missing. + + JSON is the primary source (pre-migrate workspaces). When the JSON + file is absent — e.g., a migrated workspace whose JSON files were + deleted post-migrate (issue #35) — fall back to the SQLite cache + populated by ``PersistentRegistry.store_frontend_registry`` during + ``codelens migrate``. If neither source has data, return an empty + registry. + """ path = os.path.join(get_codelens_dir(workspace), 'frontend.json') if os.path.exists(path): try: @@ -107,6 +115,34 @@ def load_frontend_registry(workspace: str) -> Dict[str, Any]: return json.load(f) except (json.JSONDecodeError, IOError): logger.debug("Failed to parse frontend registry, returning empty", exc_info=True) + # JSON missing or unreadable — try the SQLite cache (issue #35). + return _load_frontend_registry_from_sqlite(workspace) + + +def _load_frontend_registry_from_sqlite(workspace: str) -> Dict[str, Any]: + """Load the frontend registry from the SQLite cache, or empty if unavailable. + + Used as a fallback when ``frontend.json`` is missing (e.g., deleted + after ``migrate``). Returns an empty registry if SQLite is unavailable, + the db doesn't exist, or the cache row is absent — so callers always + get a well-formed dict. + """ + try: + from persistent_registry import PersistentRegistry, is_sqlite_available + if not is_sqlite_available(): + return _empty_frontend_registry(workspace) + reg = PersistentRegistry(workspace) + cached = reg.load_frontend_registry() + reg.close() + if cached: + return cached + except Exception: + logger.debug("SQLite frontend registry fallback failed", exc_info=True) + return _empty_frontend_registry(workspace) + + +def _empty_frontend_registry(workspace: str) -> Dict[str, Any]: + """Return an empty frontend registry with the expected schema.""" return { "last_updated": "", "workspace": workspace, @@ -128,7 +164,15 @@ def save_frontend_registry(workspace: str, data: Dict[str, Any]) -> None: def load_backend_registry(workspace: str) -> Dict[str, Any]: - """Load backend.json registry.""" + """Load backend.json registry, falling back to SQLite if missing. + + JSON is the primary source (pre-migrate workspaces). When the JSON + file is absent — e.g., a migrated workspace whose JSON files were + deleted post-migrate (issue #35) — fall back to the SQLite cache + populated by ``PersistentRegistry.store_backend_registry`` during + ``codelens migrate``. If neither source has data, return an empty + registry. + """ path = os.path.join(get_codelens_dir(workspace), 'backend.json') if os.path.exists(path): try: @@ -136,6 +180,34 @@ def load_backend_registry(workspace: str) -> Dict[str, Any]: return json.load(f) except (json.JSONDecodeError, IOError): logger.debug("Failed to parse backend registry, returning empty", exc_info=True) + # JSON missing or unreadable — try the SQLite cache (issue #35). + return _load_backend_registry_from_sqlite(workspace) + + +def _load_backend_registry_from_sqlite(workspace: str) -> Dict[str, Any]: + """Load the backend registry from the SQLite cache, or empty if unavailable. + + Used as a fallback when ``backend.json`` is missing (e.g., deleted + after ``migrate``). Returns an empty registry if SQLite is unavailable, + the db doesn't exist, or the cache row is absent — so callers always + get a well-formed dict. + """ + try: + from persistent_registry import PersistentRegistry, is_sqlite_available + if not is_sqlite_available(): + return _empty_backend_registry(workspace) + reg = PersistentRegistry(workspace) + cached = reg.load_backend_registry() + reg.close() + if cached: + return cached + except Exception: + logger.debug("SQLite backend registry fallback failed", exc_info=True) + return _empty_backend_registry(workspace) + + +def _empty_backend_registry(workspace: str) -> Dict[str, Any]: + """Return an empty backend registry with the expected schema.""" return { "last_updated": "", "workspace": workspace, diff --git a/tests/test_cli.py b/tests/test_cli.py index c572507..2de8d9d 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,6 +15,8 @@ from commands.query import cmd_query from commands.list import cmd_list from commands.init import cmd_init +from commands.migrate import cmd_migrate +from codelens import _registry_exists def _create_sample_workspace(): @@ -331,3 +333,155 @@ def test_check_full_cli_invocation_with_positional(self): finally: import shutil shutil.rmtree(ws, ignore_errors=True) + + +# ─── _registry_exists after migrate (issue #35) ───────────────────── + + +class TestRegistryExistsSqlite: + """Regression guard for issue #35. + + Before the fix, ``_registry_exists`` only checked for + ``backend.json`` / ``frontend.json``. A workspace that had been + migrated to SQLite (``codelens migrate``) and whose JSON files + were then deleted was always treated as having no registry, so + every subsequent command silently re-ran ``init + scan`` and threw + away the migrated data — negating the whole point of ``migrate``. + + The fix adds a second path: a populated ``codelens.db`` also + counts as a valid registry. "Populated" means the ``symbols`` + table has at least one row, so an empty or corrupt db is NOT + falsely treated as valid. + """ + + def test_registry_exists_after_migrate_with_json_deleted(self): + """migrate → delete JSON → ``_registry_exists`` must return True. + + This is the exact repro from issue #35. + """ + ws = _create_sample_workspace() + try: + cmd_init(ws) + cmd_scan(ws) + # Sanity: JSON files exist before migrate. + assert os.path.exists(os.path.join(ws, ".codelens", "backend.json")) + assert os.path.exists(os.path.join(ws, ".codelens", "frontend.json")) + + # Migrate JSON → SQLite. + migrate_result = cmd_migrate(ws) + assert migrate_result["status"] == "ok", migrate_result + + # Delete the JSON files (post-migrate cleanup). + os.remove(os.path.join(ws, ".codelens", "backend.json")) + os.remove(os.path.join(ws, ".codelens", "frontend.json")) + + # The fix: the migrated SQLite db must still satisfy + # _registry_exists so commands don't trigger auto-setup. + assert _registry_exists(ws) is True, ( + "issue #35 regression: migrated workspace with deleted JSON " + "files is not recognized as having a valid registry" + ) + finally: + import shutil + shutil.rmtree(ws, ignore_errors=True) + + def test_registry_exists_false_for_empty_db(self): + """An empty SQLite db (``symbols`` has 0 rows) must NOT be valid. + + Issue #35 constraint: 'db kosong/corrupt tidak salah dianggap + registry valid'. + """ + import sqlite3 + + with tempfile.TemporaryDirectory() as ws: + codelens_dir = os.path.join(ws, ".codelens") + os.makedirs(codelens_dir) + db_path = os.path.join(codelens_dir, "codelens.db") + # Create a real SQLite db with the symbols table but no rows. + conn = sqlite3.connect(db_path) + conn.execute( + "CREATE TABLE symbols (id INTEGER PRIMARY KEY, name TEXT)" + ) + conn.commit() + conn.close() + + # No JSON files, empty db → must be False so auto-setup runs. + assert _registry_exists(ws) is False, ( + "empty SQLite db should not be treated as a valid registry" + ) + + def test_registry_exists_false_for_corrupt_db(self): + """A corrupt SQLite db (random bytes) must NOT be valid.""" + with tempfile.TemporaryDirectory() as ws: + codelens_dir = os.path.join(ws, ".codelens") + os.makedirs(codelens_dir) + db_path = os.path.join(codelens_dir, "codelens.db") + with open(db_path, "wb") as f: + f.write(b"not a sqlite database file - corrupt bytes") + + # No JSON files, corrupt db → must be False. + assert _registry_exists(ws) is False, ( + "corrupt SQLite db should not be treated as a valid registry" + ) + + def test_registry_exists_true_for_json_only_workspace(self): + """Legacy JSON-only workspace (no migrate) must still work. + + Ensures the fix is purely additive — path 1 (JSON check) is + unchanged and still detects pre-migration workspaces, even + though ``scan`` may also create an empty ``codelens.db`` shell + via ``store_scan_result`` (which writes only to + ``analysis_cache``, not ``symbols``). + """ + ws = _create_sample_workspace() + try: + cmd_init(ws) + cmd_scan(ws) + # Pre-migration state: JSON files exist. + assert os.path.exists(os.path.join(ws, ".codelens", "backend.json")) + assert os.path.exists(os.path.join(ws, ".codelens", "frontend.json")) + + assert _registry_exists(ws) is True, ( + "JSON-only workspace should still be detected as valid " + "(pre-existing behavior must not regress)" + ) + finally: + import shutil + shutil.rmtree(ws, ignore_errors=True) + + def test_query_uses_sqlite_fallback_after_json_deleted(self): + """End-to-end: query must return real data from SQLite after + ``migrate`` + JSON deletion — not just avoid auto-setup, but + actually serve the migrated data (issue #35: 'padahal data + lengkap sudah ada di codelens.db'). + + Verifies that ``load_backend_registry`` / ``load_frontend_registry`` + fall back to the SQLite cache populated by ``migrate`` when the + JSON files are missing. + """ + ws = _create_sample_workspace() + try: + cmd_init(ws) + cmd_scan(ws) + # Sanity: the symbol we'll query exists in the JSON registry. + pre = cmd_query("verify_token", ws, domain="backend") + assert pre["found"] is True, ( + "verify_token must exist in JSON registry before migrate" + ) + + # Migrate → delete JSON → query must still find the symbol. + mig = cmd_migrate(ws) + assert mig["status"] == "ok", mig + os.remove(os.path.join(ws, ".codelens", "backend.json")) + os.remove(os.path.join(ws, ".codelens", "frontend.json")) + + post = cmd_query("verify_token", ws, domain="backend") + assert post["found"] is True, ( + "query must return data from SQLite cache after JSON deletion " + "(issue #35: migrated data must be usable, not just present)" + ) + assert post["type"] == "function" + assert post["node"]["fn"] == "verify_token" + finally: + import shutil + shutil.rmtree(ws, ignore_errors=True) diff --git a/tests/test_persistent_registry.py b/tests/test_persistent_registry.py index 7d14ee3..6f29927 100644 --- a/tests/test_persistent_registry.py +++ b/tests/test_persistent_registry.py @@ -464,12 +464,22 @@ def test_migrate_already_exists(self, workspace): from commands.migrate import cmd_migrate from persistent_registry import PersistentRegistry - # Create the DB first + # Create the DB first and populate it with at least one symbol. + # An empty db shell (created by ``scan`` via ``store_scan_result``) + # must NOT trigger the "already exists" early-return — otherwise + # the real JSON→SQLite migration is skipped and ``symbols`` stays + # empty, which is the root cause of issue #35. Only a populated + # db should be treated as "already migrated". reg = PersistentRegistry(workspace) - reg._connect() + conn = reg._connect() + conn.execute( + "INSERT INTO symbols (name, kind, file_path) VALUES (?, ?, ?)", + ("placeholder", "function", "/fake/path.py"), + ) + conn.commit() reg.close() - # Should return "already exists" message + # Should return "already exists" message because the db is populated. result = cmd_migrate(workspace) assert result['status'] == 'ok' assert 'already exists' in result.get('message', '')