Wolfvin · Wolfvin · Jun 29, 2026 · Jun 28, 2026
diff --git a/scripts/persistent_registry.py b/scripts/persistent_registry.py
@@ -728,7 +728,7 @@
        joined = "\n".join(rel_paths)
        return hashlib.sha256(joined.encode("utf-8")).hexdigest()

    def store_scan_result(self, result: Dict[str, Any]) -> None:
        """Persist a normalized subset of a scan result to ``analysis_cache``.

        Closes the P0 silent bug from issue #31: ``codelens.py`` calls
@@ -745,6 +745,17 @@
         Idempotent: re-scanning the same file set with the same result
         subset does not insert a duplicate row.
 
+        Closes the P1 idempotency bug from issue #82: the previous
+        implementation included ``scan_timestamp = time.time()`` in the
+        ``result_hash`` computation, so the hash changed on every call and
+        the idempotency check below never matched — every re-scan inserted
+        a fresh row, causing unbounded ``analysis_cache`` growth. The fix
+        splits the subset into a stable part (no timestamp) used for
+        hashing and a full part (with timestamp) used for storage. On a
+        cache hit, the existing row's ``timestamp`` and ``result_json``
+        are updated in place so trend tracking still reflects the latest
+        scan without inserting duplicates.
+
         Args:
             result: The full scan result dict returned by
                 ``commands.scan.cmd_scan``. Expected to contain the
@@ -768,7 +779,12 @@
             + int(backend.get("nodes", 0) or 0)
         )
 
-        subset = {
+        # Stable subset (no scan_timestamp) used for result_hash — issue #82.
+        # Excluding scan_timestamp from the hash makes re-scans of the same
+        # file set with the same counts produce an identical result_hash,
+        # so the idempotency check below finds the existing row instead of
+        # inserting a duplicate on every call (which caused cache bloat).
+        stable_subset = {
             "files_scanned": files_scanned,
             "frontend_counts": {
                 "classes": int(frontend.get("classes", 0) or 0),
@@ -779,10 +795,18 @@
                 "edges": int(backend.get("edges", 0) or 0),
             },
             "frameworks": frameworks,
-            "scan_timestamp": time.time(),
             "total_symbols": total_symbols,
         }
 
+        # Capture once per call and reuse for both the JSON scan_timestamp
+        # field and the analysis_cache.timestamp column — keeps the two
+        # consistent without paying for a second time.time() call.
+        now = time.time()
+        # Full subset (with scan_timestamp) stored in result_json for trend
+        # tracking. scan_timestamp is preserved in storage but excluded
+        # from the hash above so re-scans stay idempotent.
+        subset = {**stable_subset, "scan_timestamp": now}
+
         file_set_hash = self._compute_scan_file_set_hash(workspace)
         if not file_set_hash:
             logger.warning(
@@ -791,15 +815,16 @@
             )
             return
 
+        stable_json = json.dumps(stable_subset, ensure_ascii=False, default=str, sort_keys=True)
+        result_hash = hashlib.sha256(stable_json.encode("utf-8")).hexdigest()
         result_json = json.dumps(subset, ensure_ascii=False, default=str, sort_keys=True)
-        result_hash = hashlib.sha256(result_json.encode("utf-8")).hexdigest()
-        now = time.time()
 
         conn = self._connect()
-        # Idempotent: if the same (command, file_set_hash, result_hash)
-        # triple already exists, do nothing. This mirrors the "INSERT OR
-        # IGNORE" intent — re-scanning the same file set with the same
-        # outcome must not bloat the cache.
+        # Idempotent upsert: if the same (command, file_set_hash, result_hash)
+        # triple already exists, update its timestamp + result_json in place
+        # so trend tracking reflects the latest scan, instead of inserting a
+        # duplicate row. Mirrors the "INSERT OR IGNORE" intent — re-scanning
+        # the same file set with the same outcome must not bloat the cache.
         existing = conn.execute(
             """SELECT 1 FROM analysis_cache
                WHERE command = ? AND file_set_hash = ? AND result_hash = ?
@@ -808,11 +833,24 @@
             ("scan", file_set_hash, result_hash),
         ).fetchone()
         if existing:
-            logger.debug(
-                "store_scan_result: cache hit (command=scan, "
-                "file_set_hash=%s, result_hash=%s) — skipping insert",
-                file_set_hash[:12], result_hash[:12],
-            )
+            try:
+                conn.execute(
+                    """UPDATE analysis_cache
+                       SET result_json = ?, timestamp = ?
+                       WHERE command = ? AND file_set_hash = ? AND result_hash = ?
+                    """,
+                    (result_json, now, "scan", file_set_hash, result_hash),
+                )
+                conn.commit()
+                logger.debug(
+                    "store_scan_result: cache hit (command=scan, "
+                    "file_set_hash=%s, result_hash=%s) — updated timestamp",
+                    file_set_hash[:12], result_hash[:12],
+                )
+            except sqlite3.Error as e:
+                logger.warning(
+                    "store_scan_result: failed to update analysis_cache on hit: %s", e
+                )
             return
 
         try:

diff --git a/tests/test_persistent_registry.py b/tests/test_persistent_registry.py
@@ -476,13 +476,21 @@ def test_migrate_already_exists(self, workspace):
 
 
 class TestStoreScanResult:
-    """Test PersistentRegistry.store_scan_result — closes issue #31.
+    """Test PersistentRegistry.store_scan_result — closes issues #31 and #82.
 
-    The P0 bug: codelens.py calls pr.store_scan_result(result) after every
-    successful scan, but the method did not exist. The resulting
+    The P0 bug (#31): codelens.py calls pr.store_scan_result(result) after
+    every successful scan, but the method did not exist. The resulting
     AttributeError was swallowed by a bare except Exception, so the
     analysis_cache table was never populated and the sqlite_persisted
     flag was never set. These tests verify the fix.
+
+    The P1 bug (#82): the previous implementation included
+    ``scan_timestamp = time.time()`` in the ``result_hash`` computation,
+    so the hash changed on every call and the idempotency check never
+    matched — every re-scan inserted a fresh row (cache bloat). The fix
+    excludes ``scan_timestamp`` from the hash while still storing it in
+    ``result_json`` for trend tracking, and updates the existing row's
+    timestamp + result_json in place on cache hit.
     """
 
     def _make_scan_result(self, workspace, **overrides):
@@ -545,27 +553,29 @@ def test_store_scan_result_persists_to_analysis_cache(self, populated_workspace)
         assert stored["total_symbols"] == 8
 
     def test_store_scan_result_is_idempotent(self, populated_workspace):
-        """Calling store_scan_result twice with the same result inserts only one row."""
+        """Calling store_scan_result twice with the same result inserts only one row.
+
+        Issue #82: idempotency must hold with REAL time (no patching).
+        Previously this test only passed by patching ``time.time()`` to a
+        fixed value — meaningless in production where every scan has a
+        different timestamp. With ``scan_timestamp`` excluded from
+        ``result_hash``, two calls with real time still produce the same
+        hash (because the stable subset is identical) → the second call
+        hits the existing row and updates it in place instead of
+        inserting a duplicate.
+        """
         from persistent_registry import PersistentRegistry
         import sqlite3
 
         reg = PersistentRegistry(populated_workspace)
         reg._connect()
 
         result = self._make_scan_result(populated_workspace)
-        # Note: scan_timestamp is computed inside store_scan_result via
-        # time.time(), so the second call will produce a DIFFERENT subset
-        # unless we freeze the timestamp. To test idempotency, we patch
-        # time.time inside the module to return a fixed value.
-        import persistent_registry as pr_module
-
-        original_time = pr_module.time.time
-        pr_module.time.time = lambda: 1700000000.0
-        try:
-            reg.store_scan_result(result)
-            reg.store_scan_result(result)
-        finally:
-            pr_module.time.time = original_time
+        # Real time — no patching. Two consecutive calls will have
+        # different scan_timestamp values, but the stable subset used for
+        # result_hash is identical, so the idempotency check must fire.
+        reg.store_scan_result(result)
+        reg.store_scan_result(result)
 
         db_path = reg.db_path
         reg.close()
@@ -580,6 +590,59 @@ def test_store_scan_result_is_idempotent(self, populated_workspace):
             f"expected 1 row after duplicate store_scan_result, got {count}"
         )
 
+    def test_store_scan_result_updates_timestamp_on_rescan(self, populated_workspace):
+        """Re-scanning the same file set updates the existing row's timestamp.
+
+        Issue #82 acceptance criterion: idempotency means no duplicate row,
+        but the row's ``timestamp`` column must reflect the LATEST scan
+        (not the first) so trend tracking stays current. We query the
+        ``timestamp`` column after the first call, then again after the
+        second call, and assert the value changed.
+        """
+        from persistent_registry import PersistentRegistry
+        import sqlite3
+
+        reg = PersistentRegistry(populated_workspace)
+        reg._connect()
+
+        result = self._make_scan_result(populated_workspace)
+        reg.store_scan_result(result)
+
+        db_path = reg.db_path
+        reg.close()
+
+        conn = sqlite3.connect(db_path)
+        ts_after_first = conn.execute(
+            "SELECT timestamp FROM analysis_cache WHERE command = 'scan'"
+        ).fetchone()[0]
+        conn.close()
+
+        # Sleep briefly so the second call's time.time() is strictly
+        # greater than the first's (deterministic on any platform).
+        time.sleep(0.05)
+
+        reg2 = PersistentRegistry(populated_workspace)
+        reg2._connect()
+        reg2.store_scan_result(result)
+        reg2.close()
+
+        conn = sqlite3.connect(db_path)
+        row_count = conn.execute(
+            "SELECT COUNT(*) FROM analysis_cache WHERE command = 'scan'"
+        ).fetchone()[0]
+        ts_after_second = conn.execute(
+            "SELECT timestamp FROM analysis_cache WHERE command = 'scan'"
+        ).fetchone()[0]
+        conn.close()
+
+        assert row_count == 1, (
+            f"expected 1 row after re-scan (idempotent), got {row_count}"
+        )
+        assert ts_after_second > ts_after_first, (
+            f"timestamp must be updated on re-scan: "
+            f"first={ts_after_first!r}, second={ts_after_second!r}"
+        )
+
     def test_store_scan_result_different_file_sets(self, workspace):
         """Two different file sets produce two rows with different file_set_hash."""
         from persistent_registry import PersistentRegistry