From 987ea67451ca6278d3adb770629cc5e1390b87ff Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:47:40 +0800
Subject: [PATCH 01/11] feat(mind): Memory Protocol skeleton (PR6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The granular Protocol lets each backend opt in to whichever surface it
needs — in-process tools, MCP servers, or lifecycle hooks. reset() is
the only required side-effect method.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/mind/__init__.py         | 10 +++++++++
 quantmind/mind/memory/__init__.py  |  5 +++++
 quantmind/mind/memory/_protocol.py | 35 +++++++++++++++++++++++++++++
 tests/mind/__init__.py             |  0
 tests/mind/memory/__init__.py      |  0
 tests/mind/memory/test_protocol.py | 36 ++++++++++++++++++++++++++++++
 6 files changed, 86 insertions(+)
 create mode 100644 quantmind/mind/__init__.py
 create mode 100644 quantmind/mind/memory/__init__.py
 create mode 100644 quantmind/mind/memory/_protocol.py
 create mode 100644 tests/mind/__init__.py
 create mode 100644 tests/mind/memory/__init__.py
 create mode 100644 tests/mind/memory/test_protocol.py

diff --git a/quantmind/mind/__init__.py b/quantmind/mind/__init__.py
new file mode 100644
index 0000000..febb682
--- /dev/null
+++ b/quantmind/mind/__init__.py
@@ -0,0 +1,10 @@
+"""quantmind.mind — cognitive layer.
+
+PR6 introduces ``mind/memory/`` (Memory Protocol + filesystem backend).
+PR7+ will add ``mind/store/`` (knowledge store) and
+``mind/summarize_run`` (trajectory summariser).
+"""
+
+from quantmind.mind.memory import Memory
+
+__all__ = ["Memory"]
diff --git a/quantmind/mind/memory/__init__.py b/quantmind/mind/memory/__init__.py
new file mode 100644
index 0000000..d7d070a
--- /dev/null
+++ b/quantmind/mind/memory/__init__.py
@@ -0,0 +1,5 @@
+"""quantmind.mind.memory — Memory Protocol + filesystem MVP backend."""
+
+from quantmind.mind.memory._protocol import Memory
+
+__all__ = ["Memory"]
diff --git a/quantmind/mind/memory/_protocol.py b/quantmind/mind/memory/_protocol.py
new file mode 100644
index 0000000..6f0038f
--- /dev/null
+++ b/quantmind/mind/memory/_protocol.py
@@ -0,0 +1,35 @@
+"""Memory Protocol — granular cross-step working memory contract.
+
+Each method has its own narrow surface so concrete implementations can
+opt in to whichever channel(s) they need (in-process tools, MCP servers,
+lifecycle hooks). The Protocol does NOT prescribe MCP — see design doc
+\u00a711.2 for the rationale: future backends like an embedding-based
+``ChromaMemory`` are tool-only, while the MVP ``FilesystemMemory`` is
+MCP-based.
+"""
+
+from typing import Any, Protocol, runtime_checkable
+
+from agents import RunHooks, Tool
+from agents.mcp import MCPServer
+
+
+@runtime_checkable
+class Memory(Protocol):
+    """Cross-step working memory exposed to a flow's Agent."""
+
+    def tools(self) -> list[Tool]:
+        """In-process ``@function_tool`` list for the Agent."""
+        ...
+
+    def mcp_servers(self) -> list[MCPServer]:
+        """MCP servers exposed to the Agent."""
+        ...
+
+    def run_hooks(self) -> RunHooks[Any] | None:
+        """Lifecycle hooks for accounting / archiving / item indexing."""
+        ...
+
+    async def reset(self) -> None:
+        """Wipe the memory area. Implementations may be no-ops."""
+        ...
diff --git a/tests/mind/__init__.py b/tests/mind/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/mind/memory/__init__.py b/tests/mind/memory/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/mind/memory/test_protocol.py b/tests/mind/memory/test_protocol.py
new file mode 100644
index 0000000..5034ffb
--- /dev/null
+++ b/tests/mind/memory/test_protocol.py
@@ -0,0 +1,36 @@
+"""Tests for ``quantmind.mind.memory._protocol``."""
+
+import unittest
+from typing import Any
+
+from agents import RunHooks, Tool
+from agents.mcp import MCPServer
+
+from quantmind.mind.memory import Memory
+
+
+class _CompleteStub:
+    def tools(self) -> list[Tool]:
+        return []
+
+    def mcp_servers(self) -> list[MCPServer]:
+        return []
+
+    def run_hooks(self) -> RunHooks[Any] | None:
+        return None
+
+    async def reset(self) -> None:
+        return None
+
+
+class _MissingMethodStub:
+    def tools(self) -> list[Tool]:
+        return []
+
+
+class MemoryProtocolTests(unittest.TestCase):
+    def test_runtime_checkable_accepts_complete_stub(self) -> None:
+        self.assertIsInstance(_CompleteStub(), Memory)
+
+    def test_runtime_checkable_rejects_incomplete_stub(self) -> None:
+        self.assertNotIsInstance(_MissingMethodStub(), Memory)

From 1e88e89454bfbb8a6ba25d1711050a36c6fb6649 Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:48:56 +0800
Subject: [PATCH 02/11] feat(mind): RunRecord + atomic write_run_record (PR6)

generate_run_id produces a sortable timestamp plus a 3-char base36
suffix. write_run_record uses tmp+replace for the per-run JSON file
and appends to runs.jsonl under an asyncio.Lock; cross-process
concurrency is undefined.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/mind/memory/_trajectory.py |  99 +++++++++++++++++++++++
 tests/mind/memory/test_trajectory.py | 114 +++++++++++++++++++++++++++
 2 files changed, 213 insertions(+)
 create mode 100644 quantmind/mind/memory/_trajectory.py
 create mode 100644 tests/mind/memory/test_trajectory.py

diff --git a/quantmind/mind/memory/_trajectory.py b/quantmind/mind/memory/_trajectory.py
new file mode 100644
index 0000000..c51f2c9
--- /dev/null
+++ b/quantmind/mind/memory/_trajectory.py
@@ -0,0 +1,99 @@
+"""Trajectory archive: ``RunRecord`` schema + atomic writer.
+
+Each ``Runner.run`` invocation produces one ``RunRecord`` that is
+serialised to ``<memory_dir>/runs/<run_id>.json`` (atomic via
+``os.replace``) and appended to ``<memory_dir>/runs.jsonl`` (one
+JSON line per run, append-only).
+
+Cross-process concurrency is undefined; an ``asyncio.Lock`` (held by
+the calling ``FilesystemMemory``) serialises writes within a single
+Python process.
+"""
+
+import asyncio
+import json
+import os
+import secrets
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+_BASE36 = "0123456789abcdefghijklmnopqrstuvwxyz"
+
+
+@dataclass(frozen=True, slots=True)
+class RunRecord:
+    """A single QuantMind flow run trajectory record."""
+
+    schema_version: int
+    run_id: str
+    workflow_name: str
+    trace_id: str | None
+    started_at: datetime
+    ended_at: datetime
+    duration_seconds: float
+    agent: dict[str, str]
+    llm_calls: list[dict[str, Any]]
+    tool_calls: list[dict[str, Any]]
+    memory_ops: dict[str, list[str]]
+    tokens_total: dict[str, int]
+    cost_estimate_usd: float
+    input_summary: str
+    output_summary: str
+    error: str | None
+
+
+def generate_run_id(now: datetime) -> str:
+    """Build ``YYYYMMDDTHHMMSSmmm`` (UTC) plus 3 base36 random chars."""
+    stamp = now.strftime("%Y%m%dT%H%M%S") + f"{now.microsecond // 1000:03d}"
+    suffix = "".join(secrets.choice(_BASE36) for _ in range(3))
+    return f"{stamp}{suffix}"
+
+
+def _to_jsonable(value: Any) -> Any:
+    if isinstance(value, datetime):
+        return value.isoformat().replace("+00:00", "Z")
+    raise TypeError(
+        f"Object of type {type(value).__name__} is not JSON serialisable"
+    )
+
+
+def _serialise(record: RunRecord) -> str:
+    return json.dumps(
+        asdict(record),
+        default=_to_jsonable,
+        ensure_ascii=False,
+    )
+
+
+async def write_run_record(
+    memory_dir: Path,
+    record: RunRecord,
+    *,
+    archive_lock: asyncio.Lock,
+) -> None:
+    """Persist ``record`` atomically and append to ``runs.jsonl``.
+
+    Atomicity:
+
+    - The per-run JSON file is written to ``<run_id>.json.tmp`` and
+      then ``os.replace``-d into place; same-FS rename is atomic on
+      POSIX, preventing partial reads.
+    - The ``runs.jsonl`` append is serialised across one Python
+      process via ``archive_lock``; cross-process concurrency is
+      unsupported (documented in ``FilesystemMemory``).
+    """
+    payload = _serialise(record)
+    runs_dir = memory_dir / "runs"
+    runs_dir.mkdir(parents=True, exist_ok=True)
+    final = runs_dir / f"{record.run_id}.json"
+    tmp = runs_dir / f"{record.run_id}.json.tmp"
+    tmp.write_text(payload, encoding="utf-8")
+    os.replace(tmp, final)
+
+    index = memory_dir / "runs.jsonl"
+    async with archive_lock:
+        with index.open("a", encoding="utf-8") as fh:
+            fh.write(payload)
+            fh.write("\n")
diff --git a/tests/mind/memory/test_trajectory.py b/tests/mind/memory/test_trajectory.py
new file mode 100644
index 0000000..a00212b
--- /dev/null
+++ b/tests/mind/memory/test_trajectory.py
@@ -0,0 +1,114 @@
+"""Tests for ``quantmind.mind.memory._trajectory``."""
+
+import asyncio
+import json
+import tempfile
+import unittest
+from datetime import datetime, timezone
+from pathlib import Path
+
+from quantmind.mind.memory._trajectory import (
+    RunRecord,
+    generate_run_id,
+    write_run_record,
+)
+
+
+def _fixture_record(run_id: str) -> RunRecord:
+    started = datetime(2026, 5, 8, 12, 30, 45, 123000, tzinfo=timezone.utc)
+    ended = datetime(2026, 5, 8, 12, 30, 50, 456000, tzinfo=timezone.utc)
+    return RunRecord(
+        schema_version=1,
+        run_id=run_id,
+        workflow_name="quantmind.paper_flow",
+        trace_id="trace_abc",
+        started_at=started,
+        ended_at=ended,
+        duration_seconds=5.333,
+        agent={
+            "name": "paper_extractor",
+            "model": "gpt-4o",
+            "instructions_hash": "abc",
+        },
+        llm_calls=[
+            {
+                "tokens_in": 10,
+                "tokens_out": 5,
+                "duration_s": 0.5,
+                "model": "gpt-4o",
+            }
+        ],
+        tool_calls=[],
+        memory_ops={"files_read": [], "files_written": []},
+        tokens_total={"input": 10, "output": 5},
+        cost_estimate_usd=0.0,
+        input_summary="hello",
+        output_summary="world",
+        error=None,
+    )
+
+
+class GenerateRunIdTests(unittest.TestCase):
+    def test_format_matches_documented_shape(self) -> None:
+        now = datetime(2026, 5, 8, 12, 30, 45, 123456, tzinfo=timezone.utc)
+        run_id = generate_run_id(now)
+        self.assertRegex(run_id, r"^20260508T123045123[0-9a-z]{3}$")
+
+    def test_different_calls_produce_different_ids(self) -> None:
+        now = datetime(2026, 5, 8, 12, 30, 45, 123000, tzinfo=timezone.utc)
+        ids = {generate_run_id(now) for _ in range(50)}
+        self.assertGreater(len(ids), 40)
+
+
+class WriteRunRecordTests(unittest.IsolatedAsyncioTestCase):
+    async def test_writes_atomic_per_run_file(self) -> None:
+        with tempfile.TemporaryDirectory() as raw_dir:
+            mem_dir = Path(raw_dir)
+            (mem_dir / "runs").mkdir()
+            record = _fixture_record("rid001abc")
+            await write_run_record(mem_dir, record, archive_lock=asyncio.Lock())
+            target = mem_dir / "runs" / "rid001abc.json"
+            self.assertTrue(target.exists())
+            tmp = mem_dir / "runs" / "rid001abc.json.tmp"
+            self.assertFalse(tmp.exists())
+            payload = json.loads(target.read_text())
+            self.assertEqual(payload["run_id"], "rid001abc")
+            self.assertEqual(
+                payload["started_at"], "2026-05-08T12:30:45.123000Z"
+            )
+
+    async def test_appends_one_line_to_runs_jsonl(self) -> None:
+        with tempfile.TemporaryDirectory() as raw_dir:
+            mem_dir = Path(raw_dir)
+            (mem_dir / "runs").mkdir()
+            lock = asyncio.Lock()
+            await write_run_record(
+                mem_dir, _fixture_record("a01abc"), archive_lock=lock
+            )
+            await write_run_record(
+                mem_dir, _fixture_record("b02def"), archive_lock=lock
+            )
+            lines = (mem_dir / "runs.jsonl").read_text().splitlines()
+            self.assertEqual(len(lines), 2)
+            ids = [json.loads(line)["run_id"] for line in lines]
+            self.assertEqual(ids, ["a01abc", "b02def"])
+
+    async def test_concurrent_writes_serialise_under_lock(self) -> None:
+        with tempfile.TemporaryDirectory() as raw_dir:
+            mem_dir = Path(raw_dir)
+            (mem_dir / "runs").mkdir()
+            lock = asyncio.Lock()
+            await asyncio.gather(
+                write_run_record(
+                    mem_dir,
+                    _fixture_record("c01abc"),
+                    archive_lock=lock,
+                ),
+                write_run_record(
+                    mem_dir,
+                    _fixture_record("c02def"),
+                    archive_lock=lock,
+                ),
+            )
+            lines = (mem_dir / "runs.jsonl").read_text().splitlines()
+            self.assertEqual(len(lines), 2)

From acd359aaab6a5cd1e4b5e4145c71daa8bf5aa4d8 Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:50:24 +0800
Subject: [PATCH 03/11] feat(mind): MemoryRunHooks accumulator + persist (PR6)

Lifecycle methods accumulate llm_calls and tool_calls plus agent
metadata. persist() is invoked by the runner in finally so failed
runs still archive (with error set to str(exc)).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/mind/memory/_run_hooks.py | 172 ++++++++++++++++++++++++++++
 tests/mind/memory/test_run_hooks.py | 137 ++++++++++++++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 quantmind/mind/memory/_run_hooks.py
 create mode 100644 tests/mind/memory/test_run_hooks.py

diff --git a/quantmind/mind/memory/_run_hooks.py b/quantmind/mind/memory/_run_hooks.py
new file mode 100644
index 0000000..c89f624
--- /dev/null
+++ b/quantmind/mind/memory/_run_hooks.py
@@ -0,0 +1,172 @@
+"""``MemoryRunHooks`` — per-run lifecycle accumulator + ``persist()``.
+
+Constructed fresh per ``paper_flow`` invocation by
+``FilesystemMemory.run_hooks()``. Each lifecycle method accumulates
+metrics; the runner calls ``persist()`` in a ``finally`` block so
+runs that fail still produce a trajectory record (with ``error``
+set to ``str(exc)``).
+"""
+
+import asyncio
+import hashlib
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from agents import RunHooks
+
+from quantmind.mind.memory._trajectory import (
+    RunRecord,
+    generate_run_id,
+    write_run_record,
+)
+
+_SUMMARY_LIMIT = 500
+_TRUNC_SUFFIX = "... [truncated]"
+
+
+def _truncate(s: str, limit: int = _SUMMARY_LIMIT) -> str:
+    if len(s) <= limit:
+        return s
+    head = limit - len(_TRUNC_SUFFIX)
+    return s[:head] + _TRUNC_SUFFIX
+
+
+def _instructions_hash(instructions: str | None) -> str:
+    if not instructions:
+        return ""
+    return hashlib.sha256(instructions.encode("utf-8")).hexdigest()[:16]
+
+
+def _safe_repr(obj: Any) -> str:
+    if obj is None:
+        return ""
+    dump = getattr(obj, "model_dump_json", None)
+    if callable(dump):
+        try:
+            return dump()
+        except Exception:  # noqa: BLE001
+            pass
+    return str(obj)
+
+
+class MemoryRunHooks(RunHooks[Any]):
+    """Per-run lifecycle accumulator that persists a ``RunRecord``.
+
+    State lives on the instance and is written exactly once via
+    ``persist`` — typically called by the runner in a ``finally``
+    block so failed runs still archive.
+    """
+
+    def __init__(self, *, memory_dir: Path, archive_lock: asyncio.Lock) -> None:
+        self._memory_dir = memory_dir
+        self._archive_lock = archive_lock
+
+        self._started_at: datetime | None = None
+        self._ended_at: datetime | None = None
+        self._agent_name: str = ""
+        self._agent_model: str = ""
+        self._instructions_hash: str = ""
+        self._output_summary: str = ""
+        self._llm_calls: list[dict[str, Any]] = []
+        self._tool_calls: list[dict[str, Any]] = []
+
+        self._llm_timer_start: float | None = None
+        self._tool_timer_starts: dict[int, float] = {}
+
+    async def on_agent_start(self, ctx: Any, agent: Any) -> None:
+        self._started_at = datetime.now(timezone.utc)
+        self._agent_name = str(getattr(agent, "name", "") or "")
+        self._agent_model = str(getattr(agent, "model", "") or "")
+        self._instructions_hash = _instructions_hash(
+            getattr(agent, "instructions", None)
+        )
+
+    async def on_llm_start(self, *_: Any, **__: Any) -> None:
+        self._llm_timer_start = time.monotonic()
+
+    async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None:
+        duration = (
+            (time.monotonic() - self._llm_timer_start)
+            if self._llm_timer_start is not None
+            else 0.0
+        )
+        self._llm_timer_start = None
+        usage = getattr(response, "usage", None)
+        tokens_in = int(getattr(usage, "input_tokens", 0) or 0)
+        tokens_out = int(getattr(usage, "output_tokens", 0) or 0)
+        model = str(getattr(agent, "model", "") or "")
+        self._llm_calls.append(
+            {
+                "tokens_in": tokens_in,
+                "tokens_out": tokens_out,
+                "duration_s": round(duration, 4),
+                "model": model,
+            }
+        )
+
+    async def on_tool_start(self, ctx: Any, agent: Any, tool: Any) -> None:
+        self._tool_timer_starts[id(tool)] = time.monotonic()
+
+    async def on_tool_end(
+        self, ctx: Any, agent: Any, tool: Any, result: Any
+    ) -> None:
+        start = self._tool_timer_starts.pop(id(tool), None)
+        duration = (time.monotonic() - start) if start is not None else 0.0
+        name = str(getattr(tool, "name", "") or "")
+        self._tool_calls.append(
+            {
+                "name": name,
+                "args": (_truncate(str(result)) if result is not None else ""),
+                "duration_s": round(duration, 4),
+            }
+        )
+
+    async def on_agent_end(self, ctx: Any, agent: Any, output: Any) -> None:
+        self._ended_at = datetime.now(timezone.utc)
+        self._output_summary = _truncate(_safe_repr(output))
+
+    async def persist(
+        self,
+        *,
+        workflow_name: str,
+        result: Any,
+        error: BaseException | None,
+        input_payload: Any,
+    ) -> None:
+        """Build a ``RunRecord`` from accumulated state and write it.
+
+        Called by the runner in ``finally`` so failed runs archive too.
+        """
+        ended = self._ended_at or datetime.now(timezone.utc)
+        started = self._started_at or ended
+        tokens_total = {
+            "input": sum(c["tokens_in"] for c in self._llm_calls),
+            "output": sum(c["tokens_out"] for c in self._llm_calls),
+        }
+        record = RunRecord(
+            schema_version=1,
+            run_id=generate_run_id(started),
+            workflow_name=workflow_name,
+            trace_id=(getattr(result, "trace_id", None) if result else None),
+            started_at=started,
+            ended_at=ended,
+            duration_seconds=round((ended - started).total_seconds(), 4),
+            agent={
+                "name": self._agent_name,
+                "model": self._agent_model,
+                "instructions_hash": self._instructions_hash,
+            },
+            llm_calls=list(self._llm_calls),
+            tool_calls=list(self._tool_calls),
+            memory_ops={"files_read": [], "files_written": []},
+            tokens_total=tokens_total,
+            cost_estimate_usd=0.0,
+            input_summary=_truncate(_safe_repr(input_payload)),
+            output_summary=self._output_summary,
+            error=str(error) if error is not None else None,
+        )
+        await write_run_record(
+            self._memory_dir, record, archive_lock=self._archive_lock
+        )
diff --git a/tests/mind/memory/test_run_hooks.py b/tests/mind/memory/test_run_hooks.py
new file mode 100644
index 0000000..fcab3f6
--- /dev/null
+++ b/tests/mind/memory/test_run_hooks.py
@@ -0,0 +1,137 @@
+"""Tests for ``quantmind.mind.memory._run_hooks``."""
+
+import asyncio
+import tempfile
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+from quantmind.mind.memory._run_hooks import MemoryRunHooks
+
+
+def _make_hooks(tmpdir: Path) -> MemoryRunHooks:
+    (tmpdir / "runs").mkdir(parents=True, exist_ok=True)
+    return MemoryRunHooks(memory_dir=tmpdir, archive_lock=asyncio.Lock())
+
+
+class MemoryRunHooksLifecycleTests(unittest.IsolatedAsyncioTestCase):
+    async def test_on_agent_start_records_metadata(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            hooks = _make_hooks(Path(raw))
+            agent = SimpleNamespace(
+                name="paper_extractor",
+                model="gpt-4o",
+                instructions="extract papers",
+            )
+            await hooks.on_agent_start(SimpleNamespace(), agent)
+            self.assertEqual(hooks._agent_name, "paper_extractor")
+            self.assertEqual(hooks._agent_model, "gpt-4o")
+            self.assertEqual(len(hooks._instructions_hash), 16)
+
+    async def test_on_llm_end_accumulates_call(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            hooks = _make_hooks(Path(raw))
+            await hooks.on_agent_start(
+                SimpleNamespace(),
+                SimpleNamespace(name="a", model="gpt-4o", instructions=""),
+            )
+            await hooks.on_llm_start()
+            response = SimpleNamespace(
+                usage=SimpleNamespace(input_tokens=42, output_tokens=11)
+            )
+            await hooks.on_llm_end(
+                SimpleNamespace(),
+                SimpleNamespace(model="gpt-4o"),
+                response,
+            )
+            self.assertEqual(len(hooks._llm_calls), 1)
+            self.assertEqual(hooks._llm_calls[0]["tokens_in"], 42)
+            self.assertEqual(hooks._llm_calls[0]["tokens_out"], 11)
+
+    async def test_on_llm_end_handles_missing_usage(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            hooks = _make_hooks(Path(raw))
+            await hooks.on_llm_start()
+            await hooks.on_llm_end(
+                SimpleNamespace(),
+                SimpleNamespace(model="gpt-4o"),
+                SimpleNamespace(usage=None),
+            )
+            self.assertEqual(hooks._llm_calls[0]["tokens_in"], 0)
+            self.assertEqual(hooks._llm_calls[0]["tokens_out"], 0)
+
+    async def test_on_tool_pair_records_call(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            hooks = _make_hooks(Path(raw))
+            tool = SimpleNamespace(name="read_file")
+            await hooks.on_tool_start(
+                SimpleNamespace(), SimpleNamespace(), tool
+            )
+            await hooks.on_tool_end(
+                SimpleNamespace(),
+                SimpleNamespace(),
+                tool,
+                "file contents",
+            )
+            self.assertEqual(len(hooks._tool_calls), 1)
+            self.assertEqual(hooks._tool_calls[0]["name"], "read_file")
+            self.assertGreaterEqual(hooks._tool_calls[0]["duration_s"], 0.0)
+
+    async def test_on_agent_end_truncates_long_output(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            hooks = _make_hooks(Path(raw))
+            big = "x" * 1000
+            await hooks.on_agent_end(SimpleNamespace(), SimpleNamespace(), big)
+            self.assertTrue(hooks._output_summary.endswith("[truncated]"))
+            self.assertLessEqual(len(hooks._output_summary), 500)
+
+
+class MemoryRunHooksPersistTests(unittest.IsolatedAsyncioTestCase):
+    async def test_persist_success_writes_record_with_no_error(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem_dir = Path(raw)
+            hooks = _make_hooks(mem_dir)
+            await hooks.on_agent_start(
+                SimpleNamespace(),
+                SimpleNamespace(name="a", model="gpt-4o", instructions="x"),
+            )
+            await hooks.on_agent_end(
+                SimpleNamespace(), SimpleNamespace(), "out"
+            )
+            with patch(
+                "quantmind.mind.memory._run_hooks.write_run_record",
+                new=AsyncMock(),
+            ) as mock_write:
+                await hooks.persist(
+                    workflow_name="quantmind.paper_flow",
+                    result=SimpleNamespace(trace_id="trace_xx"),
+                    error=None,
+                    input_payload="hello",
+                )
+            args, _kwargs = mock_write.call_args
+            record = args[1]
+            self.assertEqual(record.workflow_name, "quantmind.paper_flow")
+            self.assertEqual(record.trace_id, "trace_xx")
+            self.assertIsNone(record.error)
+
+    async def test_persist_error_path_records_exception_string(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem_dir = Path(raw)
+            hooks = _make_hooks(mem_dir)
+            with patch(
+                "quantmind.mind.memory._run_hooks.write_run_record",
+                new=AsyncMock(),
+            ) as mock_write:
+                await hooks.persist(
+                    workflow_name="quantmind.paper_flow",
+                    result=None,
+                    error=ValueError("boom"),
+                    input_payload="hi",
+                )
+            record = mock_write.call_args.args[1]
+            self.assertEqual(record.error, "boom")

From 6f9977170671b58dd7d3e368c423afc78ccff607 Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:51:57 +0800
Subject: [PATCH 04/11] feat(mind): FilesystemMemory MVP (PR6)

Routes Agent file access through the SDK's MCPServerStdio
(npx + @modelcontextprotocol/server-filesystem). run_hooks() returns
a fresh MemoryRunHooks per call sharing the per-instance asyncio.Lock.
reset() is destructive and refuses '/' and the user home directory.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/mind/__init__.py           |   4 +-
 quantmind/mind/memory/__init__.py    |   4 +-
 quantmind/mind/memory/filesystem.py  | 116 +++++++++++++++++++++++++++
 tests/mind/memory/test_filesystem.py |  82 +++++++++++++++++++
 4 files changed, 203 insertions(+), 3 deletions(-)
 create mode 100644 quantmind/mind/memory/filesystem.py
 create mode 100644 tests/mind/memory/test_filesystem.py

diff --git a/quantmind/mind/__init__.py b/quantmind/mind/__init__.py
index febb682..0a39d60 100644
--- a/quantmind/mind/__init__.py
+++ b/quantmind/mind/__init__.py
@@ -5,6 +5,6 @@
 ``mind/summarize_run`` (trajectory summariser).
 """
 
-from quantmind.mind.memory import Memory
+from quantmind.mind.memory import FilesystemMemory, Memory, MemoryRunHooks
 
-__all__ = ["Memory"]
+__all__ = ["FilesystemMemory", "Memory", "MemoryRunHooks"]
diff --git a/quantmind/mind/memory/__init__.py b/quantmind/mind/memory/__init__.py
index d7d070a..579b5b1 100644
--- a/quantmind/mind/memory/__init__.py
+++ b/quantmind/mind/memory/__init__.py
@@ -1,5 +1,7 @@
 """quantmind.mind.memory — Memory Protocol + filesystem MVP backend."""
 
 from quantmind.mind.memory._protocol import Memory
+from quantmind.mind.memory._run_hooks import MemoryRunHooks
+from quantmind.mind.memory.filesystem import FilesystemMemory
 
-__all__ = ["Memory"]
+__all__ = ["FilesystemMemory", "Memory", "MemoryRunHooks"]
diff --git a/quantmind/mind/memory/filesystem.py b/quantmind/mind/memory/filesystem.py
new file mode 100644
index 0000000..7bb8c77
--- /dev/null
+++ b/quantmind/mind/memory/filesystem.py
@@ -0,0 +1,116 @@
+"""``FilesystemMemory`` — MVP cross-step memory backed by an MCP filesystem server.
+
+Layout::
+
+    <memory_dir>/
+        notes/                # agent's free-form working notes (markdown)
+        items/                # typed KnowledgeItem JSON (PR7+)
+        runs/                 # trajectory archive — runs/<run_id>.json
+        runs.jsonl            # append-only run index
+        README.md             # agent-facing usage guide
+
+Requires Node.js + ``npx`` on PATH (the SDK's ``MCPServerStdio`` spawns
+``npx -y @modelcontextprotocol/server-filesystem``). ``__init__`` does
+**not** pre-flight-check ``npx``; the SDK surfaces a clear error at run
+time when it is missing.
+"""
+
+import asyncio
+import shutil
+from pathlib import Path
+from typing import Any
+
+from agents import RunHooks, Tool
+from agents.mcp import MCPServer, MCPServerStdio
+
+from quantmind.mind.memory._run_hooks import MemoryRunHooks
+
+_AGENT_README_TEXT = """\
+# Memory directory for QuantMind flow run
+
+Available subdirectories:
+- `notes/` — your free-form working notes (markdown). Read existing notes
+  before writing new ones.
+- `items/` — structured KnowledgeItem JSON files emitted by previous runs.
+- `runs/` — system-managed run trajectory logs (do not edit).
+- `runs.jsonl` — append-only run index (system-managed, do not edit).
+
+Guidelines:
+1. BEFORE doing your task, list `notes/` and `items/` to see relevant
+   prior context.
+2. When you find a fact worth remembering, write a short markdown note
+   under `notes/`, named `<topic>_<short_slug>.md`.
+3. Don't repeat work. If the same item is already in `items/`, prefer
+   to reference rather than re-extract.
+"""
+
+
+def _is_forbidden_path(path: Path) -> bool:
+    return path == Path("/") or path == Path.home()
+
+
+class FilesystemMemory:
+    """Filesystem-backed cross-step memory using the MCP filesystem server.
+
+    Constructed once per serial loop / batch; passed to ``paper_flow``
+    via the ``memory=`` kwarg. Each ``run_hooks()`` invocation returns
+    a fresh ``MemoryRunHooks`` so per-run accumulator state is isolated;
+    they share the per-instance ``asyncio.Lock`` that serialises
+    ``runs.jsonl`` appends.
+    """
+
+    def __init__(self, memory_dir: str | Path) -> None:
+        self.memory_dir = Path(memory_dir).resolve()
+        if _is_forbidden_path(self.memory_dir):
+            raise ValueError(
+                "memory_dir must not be '/' or the user home directory; "
+                "choose a dedicated subdirectory."
+            )
+        for sub in ("notes", "items", "runs"):
+            (self.memory_dir / sub).mkdir(parents=True, exist_ok=True)
+        readme = self.memory_dir / "README.md"
+        if not readme.exists():
+            readme.write_text(_AGENT_README_TEXT, encoding="utf-8")
+        self._archive_lock = asyncio.Lock()
+
+    def tools(self) -> list[Tool]:
+        return []
+
+    def mcp_servers(self) -> list[MCPServer]:
+        return [
+            MCPServerStdio(
+                name="quantmind_memory_fs",
+                params={
+                    "command": "npx",
+                    "args": [
+                        "-y",
+                        "@modelcontextprotocol/server-filesystem",
+                        str(self.memory_dir),
+                    ],
+                },
+            )
+        ]
+
+    def run_hooks(self) -> RunHooks[Any] | None:
+        return MemoryRunHooks(
+            memory_dir=self.memory_dir,
+            archive_lock=self._archive_lock,
+        )
+
+    async def reset(self) -> None:
+        """Wipe ``notes/``, ``items/``, ``runs/``, and ``runs.jsonl``.
+
+        Destructive — irreversibly removes every file under those paths.
+        Re-creates the empty subdirectories and seeds the agent README
+        afterwards.
+        """
+        for sub in ("notes", "items", "runs"):
+            shutil.rmtree(self.memory_dir / sub, ignore_errors=True)
+        runs_jsonl = self.memory_dir / "runs.jsonl"
+        if runs_jsonl.exists():
+            runs_jsonl.unlink()
+        for sub in ("notes", "items", "runs"):
+            (self.memory_dir / sub).mkdir(parents=True, exist_ok=True)
+        readme = self.memory_dir / "README.md"
+        if not readme.exists():
+            readme.write_text(_AGENT_README_TEXT, encoding="utf-8")
diff --git a/tests/mind/memory/test_filesystem.py b/tests/mind/memory/test_filesystem.py
new file mode 100644
index 0000000..921b08d
--- /dev/null
+++ b/tests/mind/memory/test_filesystem.py
@@ -0,0 +1,82 @@
+"""Tests for ``quantmind.mind.memory.filesystem``."""
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from agents.mcp import MCPServerStdio
+
+from quantmind.mind.memory._run_hooks import MemoryRunHooks
+from quantmind.mind.memory.filesystem import FilesystemMemory
+
+
+class FilesystemMemoryInitTests(unittest.TestCase):
+    def test_creates_subdirs_and_readme(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            for sub in ("notes", "items", "runs"):
+                self.assertTrue((mem.memory_dir / sub).is_dir())
+            self.assertTrue((mem.memory_dir / "README.md").exists())
+
+    def test_readme_only_seeded_once(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            (mem.memory_dir / "README.md").write_text("custom")
+            FilesystemMemory(raw)
+            self.assertEqual(
+                (mem.memory_dir / "README.md").read_text(), "custom"
+            )
+
+    def test_rejects_root_path(self) -> None:
+        with self.assertRaises(ValueError):
+            FilesystemMemory("/")
+
+    def test_rejects_home_path(self) -> None:
+        with self.assertRaises(ValueError):
+            FilesystemMemory(str(Path.home()))
+
+
+class FilesystemMemoryMethodsTests(unittest.TestCase):
+    def test_tools_returns_empty(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            self.assertEqual(mem.tools(), [])
+
+    def test_mcp_servers_returns_stdio_with_resolved_path(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            servers = mem.mcp_servers()
+            self.assertEqual(len(servers), 1)
+            self.assertIsInstance(servers[0], MCPServerStdio)
+
+    def test_mcp_servers_returns_fresh_instance_each_call(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            self.assertIsNot(mem.mcp_servers()[0], mem.mcp_servers()[0])
+
+    def test_run_hooks_returns_memory_run_hooks_with_lock(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            h1 = mem.run_hooks()
+            h2 = mem.run_hooks()
+            self.assertIsInstance(h1, MemoryRunHooks)
+            self.assertIsNot(h1, h2)
+            assert isinstance(h1, MemoryRunHooks)
+            assert isinstance(h2, MemoryRunHooks)
+            self.assertIs(h1._archive_lock, h2._archive_lock)
+
+
+class FilesystemMemoryResetTests(unittest.IsolatedAsyncioTestCase):
+    async def test_reset_wipes_subdirs_and_jsonl(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            (mem.memory_dir / "notes" / "n.md").write_text("x")
+            (mem.memory_dir / "items" / "i.json").write_text("{}")
+            (mem.memory_dir / "runs" / "r.json").write_text("{}")
+            (mem.memory_dir / "runs.jsonl").write_text("{}\n")
+            await mem.reset()
+            for sub in ("notes", "items", "runs"):
+                self.assertTrue((mem.memory_dir / sub).is_dir())
+                self.assertEqual(list((mem.memory_dir / sub).iterdir()), [])
+            self.assertFalse((mem.memory_dir / "runs.jsonl").exists())
+            self.assertTrue((mem.memory_dir / "README.md").exists())

From 66dbb0d2b4daf7bc4e35214b48ca0fe1010ed04a Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:53:34 +0800
Subject: [PATCH 05/11] refactor(flows): wire MemoryRunHooks via try/finally
 (PR6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

run_with_observability now consumes Memory.run_hooks() and calls
MemoryRunHooks.persist in finally so failed runs still archive.
_collect_hooks and _archive_run_artifacts are gone — the runner
holds the inline orchestration; persistence lives in
mind/memory/_trajectory.write_run_record.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/flows/_runner.py |  93 ++++++++++++------------
 tests/flows/test_runner.py | 145 ++++++++++++++++++++++++++++++-------
 2 files changed, 163 insertions(+), 75 deletions(-)

diff --git a/quantmind/flows/_runner.py b/quantmind/flows/_runner.py
index 1f3b507..9b020a2 100644
--- a/quantmind/flows/_runner.py
+++ b/quantmind/flows/_runner.py
@@ -2,9 +2,9 @@
 
 `run_with_observability` wraps `Runner.run` with `RunConfig` derived from
 `BaseFlowCfg`, composes user-supplied `RunHooks` (the SDK accepts only a
-single hooks instance per run), and leaves a no-op call site for the
-PR6 trajectory archive. Flow modules call this instead of touching the
-SDK directly so observability behaviour stays in one place.
+single hooks instance per run), and orchestrates the
+`MemoryRunHooks.persist()` call in `finally` so failed runs still
+produce a trajectory record.
 """
 
 from typing import Any
@@ -12,6 +12,7 @@
 from agents import Agent, RunConfig, RunHooks, Runner
 
 from quantmind.configs import BaseFlowCfg
+from quantmind.mind.memory import Memory, MemoryRunHooks
 
 
 async def run_with_observability(
@@ -19,7 +20,7 @@ async def run_with_observability(
     input: str | list[Any],
     *,
     cfg: BaseFlowCfg,
-    memory: object | None = None,
+    memory: Memory | None = None,
     extra_run_hooks: list[RunHooks[Any]],
 ) -> Any:
     """Build `RunConfig` + composed hooks, run the agent, return final output.
@@ -30,12 +31,12 @@ async def run_with_observability(
         cfg: Flow configuration. Tracing fields and ``max_turns`` are
             forwarded to the SDK; ``workflow_name`` falls back to
             ``"quantmind.<agent.name>"`` when unset.
-        memory: PR6 ``Memory`` placeholder. Currently unused at runtime;
-            the value is forwarded to the trajectory-archive stub so PR6
-            can wire it in without changing call sites.
-        extra_run_hooks: User-supplied hooks. Composed with any
-            memory-derived hooks (none in PR5) into a single
-            ``RunHooks`` instance.
+        memory: Optional ``Memory`` implementation. When set and
+            ``cfg.archive_trajectory`` is True, ``memory.run_hooks()``
+            participates in the run and ``MemoryRunHooks.persist()``
+            is invoked in ``finally`` (so failures archive too).
+        extra_run_hooks: User-supplied hooks composed after the memory
+            hook.
 
     Returns:
         ``RunResult.final_output`` typed by the agent's ``output_type``.
@@ -47,29 +48,40 @@ async def run_with_observability(
         trace_include_sensitive_data=cfg.trace_include_sensitive_data,
         tracing_disabled=cfg.tracing_disabled,
     )
-    hooks = _compose_hooks(_collect_hooks(memory, extra_run_hooks))
-    result = await Runner.run(
-        agent,
-        input,
-        run_config=run_cfg,
-        hooks=hooks,
-        max_turns=cfg.max_turns,
-    )
-    _archive_run_artifacts(cfg, memory, result)
-    return result.final_output
-
 
-def _collect_hooks(
-    memory: object | None,
-    extras: list[RunHooks[Any]],
-) -> list[RunHooks[Any]]:
-    """Return hooks in run order: memory hooks first (PR6), then extras."""
-    hooks: list[RunHooks[Any]] = []
-    # PR6 will append `memory.run_hooks()` here when `memory` exposes the
-    # `Memory` Protocol. PR5 keeps `memory` opaque and contributes no hooks.
-    del memory
-    hooks.extend(extras)
-    return hooks
+    memory_hooks: MemoryRunHooks | None = None
+    hooks_list: list[RunHooks[Any]] = []
+    if memory is not None and cfg.archive_trajectory:
+        h = memory.run_hooks()
+        if h is not None:
+            hooks_list.append(h)
+            if isinstance(h, MemoryRunHooks):
+                memory_hooks = h
+    hooks_list.extend(extra_run_hooks)
+    composed = _compose_hooks(hooks_list)
+
+    result: Any = None
+    error: BaseException | None = None
+    try:
+        result = await Runner.run(
+            agent,
+            input,
+            run_config=run_cfg,
+            hooks=composed,
+            max_turns=cfg.max_turns,
+        )
+        return result.final_output
+    except BaseException as exc:
+        error = exc
+        raise
+    finally:
+        if memory_hooks is not None:
+            await memory_hooks.persist(
+                workflow_name=workflow_name,
+                result=result,
+                error=error,
+                input_payload=input,
+            )
 
 
 def _compose_hooks(
@@ -87,8 +99,7 @@ class _CompositeRunHooks(RunHooks[Any]):
     """Fan out every lifecycle method to each wrapped hook in order.
 
     Exceptions from earlier hooks short-circuit the rest by design — hooks
-    are integral to the run, not best-effort. PR6's archive hook should
-    catch its own exceptions internally if it wants resilience.
+    are integral to the run, not best-effort.
     """
 
     def __init__(self, inner: list[RunHooks[Any]]) -> None:
@@ -121,17 +132,3 @@ async def on_tool_start(self, *args: Any, **kwargs: Any) -> None:
     async def on_tool_end(self, *args: Any, **kwargs: Any) -> None:
         for h in self._inner:
             await h.on_tool_end(*args, **kwargs)
-
-
-def _archive_run_artifacts(
-    cfg: BaseFlowCfg,
-    memory: object | None,
-    result: Any,
-) -> None:
-    """No-op stub. PR6 writes a trajectory record under ``<memory_dir>/runs/``.
-
-    Kept as a real call site (rather than commented-out) so PR6 changes
-    one function body, not the runner's public path.
-    """
-    del cfg, memory, result
-    return None
diff --git a/tests/flows/test_runner.py b/tests/flows/test_runner.py
index 4ece594..65dab48 100644
--- a/tests/flows/test_runner.py
+++ b/tests/flows/test_runner.py
@@ -1,5 +1,6 @@
 """Tests for ``quantmind.flows._runner``."""
 
+import tempfile
 import unittest
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -8,12 +9,11 @@
 
 from quantmind.configs import PaperFlowCfg
 from quantmind.flows._runner import (
-    _archive_run_artifacts,
-    _collect_hooks,
     _compose_hooks,
     _CompositeRunHooks,
     run_with_observability,
 )
+from quantmind.mind.memory import FilesystemMemory
 
 
 class _RecordingHooks(RunHooks[Any]):
@@ -45,6 +45,12 @@ async def on_tool_end(self, *_: Any, **__: Any) -> None:
         self.log.append((self.label, "on_tool_end"))
 
 
+class _FakeRunResult:
+    def __init__(self, output: str = "ok") -> None:
+        self.final_output = output
+        self.trace_id = "trace_xyz"
+
+
 class ComposeHooksTests(unittest.TestCase):
     def test_empty_returns_none(self) -> None:
         self.assertIsNone(_compose_hooks([]))
@@ -73,7 +79,6 @@ async def test_fan_out_in_registration_order(self) -> None:
         await composite.on_handoff()
         await composite.on_tool_start()
         await composite.on_tool_end()
-        # Each method fires for both hooks in registration order.
         for method in (
             "on_llm_start",
             "on_llm_end",
@@ -100,26 +105,6 @@ async def on_llm_start(self, *_: Any, **__: Any) -> None:
         self.assertEqual(log, [])
 
 
-class CollectHooksTests(unittest.TestCase):
-    def test_memory_contributes_nothing_in_pr5(self) -> None:
-        extra = _RecordingHooks("a", [])
-        # PR5: memory is forwarded but unused.
-        self.assertEqual(_collect_hooks(None, [extra]), [extra])
-        self.assertEqual(_collect_hooks(object(), [extra]), [extra])
-
-    def test_no_extras_returns_empty(self) -> None:
-        self.assertEqual(_collect_hooks(None, []), [])
-
-
-class ArchiveStubTests(unittest.TestCase):
-    def test_archive_is_no_op(self) -> None:
-        cfg = PaperFlowCfg()
-        result = MagicMock()
-        # Must not raise, must return None, must not touch result.
-        self.assertIsNone(_archive_run_artifacts(cfg, None, result))
-        result.assert_not_called()
-
-
 class RunWithObservabilityTests(unittest.IsolatedAsyncioTestCase):
     async def test_run_config_built_from_cfg(self) -> None:
         cfg = PaperFlowCfg(
@@ -156,11 +141,10 @@ async def test_run_config_built_from_cfg(self) -> None:
         self.assertEqual(run_cfg.trace_metadata, {"k": "v"})
         self.assertFalse(run_cfg.trace_include_sensitive_data)
         self.assertTrue(run_cfg.tracing_disabled)
-        # No hooks supplied -> Runner.run sees None.
         self.assertIsNone(call.kwargs["hooks"])
 
     async def test_workflow_name_falls_back_to_agent_name(self) -> None:
-        cfg = PaperFlowCfg()  # workflow_name = None
+        cfg = PaperFlowCfg()
         agent = MagicMock()
         agent.name = "paper_extractor"
         fake_result = MagicMock()
@@ -192,8 +176,115 @@ async def test_extra_hooks_forwarded(self) -> None:
                 agent,
                 "x",
                 cfg=cfg,
-                memory=object(),  # PR6 placeholder
+                memory=None,
                 extra_run_hooks=[hook],
             )
-        # Single hook -> passed through as-is, not wrapped in composite.
         self.assertIs(run_mock.await_args.kwargs["hooks"], hook)
+
+
+class RunnerMemoryWiringTests(unittest.IsolatedAsyncioTestCase):
+    async def test_persist_invoked_on_success(self) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            cfg = PaperFlowCfg()
+            agent = MagicMock()
+            agent.name = "paper_extractor"
+            with (
+                patch(
+                    "quantmind.flows._runner.Runner.run",
+                    new=AsyncMock(return_value=_FakeRunResult("done")),
+                ),
+                patch(
+                    "quantmind.mind.memory._run_hooks.write_run_record",
+                    new=AsyncMock(),
+                ) as mock_write,
+            ):
+                out = await run_with_observability(
+                    agent,
+                    "input",
+                    cfg=cfg,
+                    memory=mem,
+                    extra_run_hooks=[],
+                )
+            self.assertEqual(out, "done")
+            self.assertEqual(mock_write.await_count, 1)
+            record = mock_write.await_args.args[1]
+            self.assertIsNone(record.error)
+            self.assertEqual(record.trace_id, "trace_xyz")
+
+    async def test_persist_invoked_on_failure_with_error_string(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            cfg = PaperFlowCfg()
+            agent = MagicMock()
+            agent.name = "paper_extractor"
+            boom = RuntimeError("boom")
+            with (
+                patch(
+                    "quantmind.flows._runner.Runner.run",
+                    new=AsyncMock(side_effect=boom),
+                ),
+                patch(
+                    "quantmind.mind.memory._run_hooks.write_run_record",
+                    new=AsyncMock(),
+                ) as mock_write,
+            ):
+                with self.assertRaises(RuntimeError):
+                    await run_with_observability(
+                        agent,
+                        "input",
+                        cfg=cfg,
+                        memory=mem,
+                        extra_run_hooks=[],
+                    )
+            self.assertEqual(mock_write.await_count, 1)
+            record = mock_write.await_args.args[1]
+            self.assertEqual(record.error, "boom")
+
+    async def test_archive_trajectory_false_skips_memory_hooks(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            cfg = PaperFlowCfg(archive_trajectory=False)
+            agent = MagicMock()
+            agent.name = "paper_extractor"
+            with (
+                patch(
+                    "quantmind.flows._runner.Runner.run",
+                    new=AsyncMock(return_value=_FakeRunResult()),
+                ),
+                patch(
+                    "quantmind.mind.memory._run_hooks.write_run_record",
+                    new=AsyncMock(),
+                ) as mock_write,
+            ):
+                await run_with_observability(
+                    agent,
+                    "input",
+                    cfg=cfg,
+                    memory=mem,
+                    extra_run_hooks=[],
+                )
+            self.assertEqual(mock_write.await_count, 0)
+
+    async def test_no_memory_skips_persist(self) -> None:
+        cfg = PaperFlowCfg()
+        agent = MagicMock()
+        agent.name = "paper_extractor"
+        with (
+            patch(
+                "quantmind.flows._runner.Runner.run",
+                new=AsyncMock(return_value=_FakeRunResult()),
+            ),
+            patch(
+                "quantmind.mind.memory._run_hooks.write_run_record",
+                new=AsyncMock(),
+            ) as mock_write,
+        ):
+            await run_with_observability(
+                agent, "input", cfg=cfg, memory=None, extra_run_hooks=[]
+            )
+        self.assertEqual(mock_write.await_count, 0)

From 343a53bb24ef9caa35dacf009213921c2eacc9ac Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:55:52 +0800
Subject: [PATCH 06/11] refactor(paper_flow): tighten memory: Memory | None and
 wire mcp/tools (PR6)

paper_flow now imports the Memory Protocol from quantmind.mind.memory.
memory.mcp_servers() and memory.tools() flow through to the Agent
unconditionally; the cfg.archive_trajectory knob is about persistence
only, not memory access.

The PR5 placeholder test test_memory_accepted_as_no_op is removed
(replaced by PaperFlowMemoryWiringTests covering the real wiring).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/flows/paper.py  | 15 ++++--
 tests/flows/test_paper.py | 98 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/quantmind/flows/paper.py b/quantmind/flows/paper.py
index 5b8373b..bf5e31f 100644
--- a/quantmind/flows/paper.py
+++ b/quantmind/flows/paper.py
@@ -26,6 +26,7 @@
 )
 from quantmind.flows._runner import run_with_observability
 from quantmind.knowledge import Paper
+from quantmind.mind.memory import Memory
 from quantmind.preprocess.fetch import (
     Fetched,
     fetch_arxiv,
@@ -68,15 +69,17 @@ async def paper_flow(
     extra_tools: list[Tool] | None = None,
     extra_instructions: str | None = None,
     output_type: type[P] | None = None,
-    memory: object | None = None,
+    memory: Memory | None = None,
     extra_run_hooks: list[RunHooks[Any]] | None = None,
     extra_input_guardrails: list[Any] | None = None,
     extra_output_guardrails: list[Any] | None = None,
 ) -> P | Paper:
     """Extract a ``Paper`` from a typed ``PaperInput``.
 
-    See design doc §4.1 for the rationale on each kwarg. ``memory`` is a
-    PR6 placeholder — non-None values are accepted but unused in PR5.
+    See design doc §4.1 for the rationale on each kwarg. When ``memory``
+    is supplied, ``memory.mcp_servers()`` and ``memory.tools()`` flow
+    through to the Agent unconditionally; trajectory archiving is gated
+    separately by ``cfg.archive_trajectory`` inside the runner.
 
     Raises:
         UnsupportedContentTypeError: When fetched bytes are not PDF /
@@ -89,6 +92,9 @@ async def paper_flow(
 
     raw_md, source_meta = await _fetch_and_format(input)
 
+    mcp_servers = memory.mcp_servers() if memory is not None else []
+    memory_tools = memory.tools() if memory is not None else []
+
     # Agent's `model_settings` parameter is non-optional (defaults to a
     # fresh ``ModelSettings()``); only forward when cfg has one set.
     agent_kwargs: dict[str, Any] = {
@@ -97,7 +103,8 @@ async def paper_flow(
             _DEFAULT_INSTRUCTIONS, extra_instructions, cfg
         ),
         "model": cfg.model,
-        "tools": list(extra_tools or []),
+        "tools": [*(extra_tools or []), *memory_tools],
+        "mcp_servers": mcp_servers,
         "output_type": out_type,
         "input_guardrails": list(extra_input_guardrails or []),
         "output_guardrails": list(extra_output_guardrails or []),
diff --git a/tests/flows/test_paper.py b/tests/flows/test_paper.py
index 2a143c7..ef3178b 100644
--- a/tests/flows/test_paper.py
+++ b/tests/flows/test_paper.py
@@ -299,18 +299,6 @@ def _capture_agent(*_a: Any, **kwargs: Any) -> Any:
         self.assertEqual(seen["input_guardrails"], [in_g])
         self.assertEqual(seen["output_guardrails"], [out_g])
 
-    async def test_memory_accepted_as_no_op(self) -> None:
-        with (
-            patch(
-                "quantmind.flows.paper.Agent",
-                return_value=MagicMock(),
-            ),
-            _patch_runner(_stub_paper()) as runner,
-        ):
-            await paper_flow(RawText(text="x"), memory=object())
-        # The runner sees the memory placeholder forwarded.
-        self.assertIsNotNone(runner.await_args.kwargs["memory"])
-
     async def test_extra_run_hooks_forwarded(self) -> None:
         class _H(RunHooks[Any]):
             pass
@@ -357,3 +345,89 @@ def _capture_agent(*_a: Any, **kwargs: Any) -> Any:
         ):
             await paper_flow(RawText(text="x"))
         self.assertNotIn("model_settings", seen)
+
+
+class PaperFlowMemoryWiringTests(unittest.IsolatedAsyncioTestCase):
+    async def test_memory_mcp_servers_passed_to_agent(self) -> None:
+        import tempfile
+
+        from quantmind.mind.memory import FilesystemMemory
+
+        seen: dict[str, Any] = {}
+
+        def _capture_agent(*_a: Any, **kwargs: Any) -> Any:
+            seen.update(kwargs)
+            return MagicMock()
+
+        with tempfile.TemporaryDirectory() as raw:
+            mem = FilesystemMemory(raw)
+            with (
+                patch(
+                    "quantmind.flows.paper.Agent",
+                    side_effect=_capture_agent,
+                ),
+                _patch_runner(_stub_paper()),
+            ):
+                await paper_flow(RawText(text="hello"), memory=mem)
+        self.assertEqual(len(seen["mcp_servers"]), 1)
+
+    async def test_no_memory_yields_empty_mcp_servers_and_tools(
+        self,
+    ) -> None:
+        seen: dict[str, Any] = {}
+
+        def _capture_agent(*_a: Any, **kwargs: Any) -> Any:
+            seen.update(kwargs)
+            return MagicMock()
+
+        with (
+            patch("quantmind.flows.paper.Agent", side_effect=_capture_agent),
+            _patch_runner(_stub_paper()),
+        ):
+            await paper_flow(RawText(text="hello"))
+        self.assertEqual(seen["mcp_servers"], [])
+        self.assertEqual(seen["tools"], [])
+
+    async def test_memory_tools_appended_after_extra_tools(self) -> None:
+        from agents import function_tool
+
+        @function_tool
+        def _extra_tool() -> str:
+            return ""
+
+        @function_tool
+        def _memory_tool() -> str:
+            return ""
+
+        class _ToolMemory:
+            def tools(self) -> list:
+                return [_memory_tool]
+
+            def mcp_servers(self) -> list:
+                return []
+
+            def run_hooks(self) -> Any:
+                return None
+
+            async def reset(self) -> None:
+                return None
+
+        seen: dict[str, Any] = {}
+
+        def _capture_agent(*_a: Any, **kwargs: Any) -> Any:
+            seen.update(kwargs)
+            return MagicMock()
+
+        with (
+            patch("quantmind.flows.paper.Agent", side_effect=_capture_agent),
+            _patch_runner(_stub_paper()),
+        ):
+            await paper_flow(
+                RawText(text="x"),
+                extra_tools=[_extra_tool],
+                memory=_ToolMemory(),
+            )
+        # Order: extra_tools first, then memory_tools.
+        self.assertEqual(len(seen["tools"]), 2)
+        self.assertIs(seen["tools"][0], _extra_tool)
+        self.assertIs(seen["tools"][1], _memory_tool)

From 2e2f16f0f67deb41081e1ef6a037dbde9c515d1e Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:56:30 +0800
Subject: [PATCH 07/11] chore(import-linter): pin mind/ as bounded subsystem
 (PR6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sixth contract forbids mind from importing flows, magic, or any of
the deleted transitional packages (tripwires). flows depends on mind,
so the reverse would create a cycle — the contract makes that fact
unmissable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 pyproject.toml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 50d6fe9..270ded0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -188,6 +188,22 @@ forbidden_modules = [
     "quantmind.models",
 ]
 
+[[tool.importlinter.contracts]]
+name = "mind only depends on knowledge/configs/preprocess/utils + agents SDK"
+type = "forbidden"
+source_modules = ["quantmind.mind"]
+# `mind` MUST NOT import `flows` or `magic` (those depend on `mind`,
+# so the reverse would create a cycle). The deleted transitional
+# packages remain as tripwires against accidental re-introduction.
+forbidden_modules = [
+    "quantmind.flows",
+    "quantmind.magic",
+    "quantmind.config",
+    "quantmind.flow",
+    "quantmind.llm",
+    "quantmind.models",
+]
+
 # ----------------------------------------------------------------------------
 # pytest: configuration + coverage gate
 # ----------------------------------------------------------------------------

From 5bf7eccd9fb2ada7874a5b4ef49565e644409231 Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 01:58:16 +0800
Subject: [PATCH 08/11] docs(mind): runbook + state table for FilesystemMemory
 (PR6)

README has a serial-loop runbook example showing the npx requirement
and trajectory archive output, plus a clarification that batch_run
rejects memory= by design. CLAUDE.md state table records the landed
mind/memory/ module + sixth import-linter contract; roadmap promotes
PR6 to "this PR" and keeps PR7+ as the next step.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md | 24 +++++++++++----------
 README.md | 62 +++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 2c790ec..f7829ca 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -29,31 +29,33 @@ quantmind/
 Key principle: QuantMind does NOT rebuild Agent runtime, lifecycle hooks, tracing,
 multi-agent handoff, or tool framework. Those come from `openai-agents`.
 
-## Current Repository State (after PR #70 / #73 / #74 / #75 / PR5)
+## Current Repository State (after PR #70 / #73 / #74 / #75 / #76 / PR6)
 
 | Module | Status | Notes |
 |--------|--------|-------|
 | `quantmind/knowledge/` | landed (PR3) | data standard with three shapes: `FlattenKnowledge` (`News` / `Earnings` / `PaperKnowledgeCard`), `TreeKnowledge` (`Paper`), `GraphKnowledge` (placeholder); shared base = `BaseKnowledge` with typed `SourceRef` / `ExtractionRef` provenance + `embedding_text()` contract |
 | `quantmind/configs/` | landed (PR3) | `BaseFlowCfg` / `BaseInput` + per-flow cfg + discriminated-union input types |
 | `quantmind/preprocess/` | landed (PR4) | `fetch/` (`fetch_arxiv` / `fetch_url` / `resolve_doi` / `read_local_file` returning `Fetched` / `RawPaper` / `CrossrefMetadata` frozen dataclasses) + `format/` (`pdf_to_markdown` via PyMuPDF, `html_to_markdown` via trafilatura) + `clean.py` + `time.py`; leaf module — only depends on `quantmind.utils` |
-| `quantmind/flows/` | landed (PR5) | apex layer: `paper_flow` (`PaperInput` → `Paper` via SDK Agent), `batch_run` + `BatchResult` (bounded-concurrency fan-out, `memory=` rejected by design), `_runner.run_with_observability` + `_compose_hooks` + `_archive_run_artifacts` (PR6 stub); only depends on configs/knowledge/preprocess/utils + `agents` SDK |
+| `quantmind/flows/` | landed (PR5, refined PR6) | apex layer: `paper_flow` (`PaperInput` → `Paper` via SDK Agent, now wired with `memory.mcp_servers()` + `memory.tools()`), `batch_run` + `BatchResult` (bounded-concurrency fan-out, `memory=` rejected by design), `_runner.run_with_observability` (PR6: `try/finally` invokes `MemoryRunHooks.persist`); depends on configs/knowledge/preprocess/utils + `mind` + `agents` SDK |
 | `quantmind/magic.py` | landed (PR5) | `resolve_magic_input(natural_language, *, target_flow, ...) -> (input, cfg)` plus `preview_resolve` debug helper; introspects flow signatures and runs a lightweight resolver Agent with `output_type=ResolvedFlowConfig[InputT, CfgT]` |
+| `quantmind/mind/memory/` | landed (PR6) | `Memory` Protocol (granular: `tools()` / `mcp_servers()` / `run_hooks()` / `reset()`) + `FilesystemMemory` MVP (MCP filesystem server via `npx`) + `MemoryRunHooks` accumulator + `RunRecord` trajectory archive under `<memory_dir>/runs/` (atomic write + `runs.jsonl` index); sixth import-linter contract pins `mind` as bounded |
 | `quantmind/utils/logger.py` | permanent | only general-purpose utility |
 
 PR5 removed the transitional packages (`quantmind/{flow,llm,config,models}/`
 and their tests under `tests/{config,models}/`); PR4 had already removed
 `quantmind/parsers/`, `quantmind/sources/`, and `quantmind/utils/tmp.py`.
-The codebase has now converged to the five permanent module roots
-(`flows/`, `configs/`, `knowledge/`, `preprocess/`, `mind/`) plus
-`magic.py` and `utils/`.
+PR6 added `quantmind/mind/memory/` and tightened the `paper_flow` /
+`_runner` signatures to consume the `Memory` Protocol directly.
+
+The codebase now has six permanent module roots (`flows/`, `configs/`,
+`knowledge/`, `preprocess/`, `mind/`) plus `magic.py` and `utils/`.
 
 `basedpyright` runs in standard mode across the whole `quantmind/`
-package — there are no per-module exclusions left. Five `import-linter`
+package — there are no per-module exclusions left. Six `import-linter`
 contracts pin the dependency graph: `utils` and `knowledge` are leaves,
 `configs` only depends on `knowledge`, `preprocess` only depends on
-`utils`, and `flows + magic` is the apex (cannot import the deleted
-transitional packages, which are listed in the contract as a tripwire
-against accidental re-introduction).
+`utils`, `flows + magic` is the apex, and `mind` is a bounded subsystem
+(cannot import `flows` / `magic` / the deleted transitional packages).
 
 ## Development Commands
 
@@ -173,7 +175,7 @@ issue instead.
 | #73 (merged) | Golden Harness — `scripts/verify.sh` with ruff + basedpyright + import-linter + pytest --cov, plus matching CI |
 | #74 (merged) | `knowledge/` data standard (Flatten / Tree / Graph shapes) + `configs/` skeleton; `openai-agents>=0.14` introduced for `BaseFlowCfg.model_settings` |
 | #75 (merged) | `preprocess/` (fetch + format two layers); deletes `parsers/` + `sources/` + `utils/tmp.py`; coverage floor 60→65; 4th import-linter contract |
-| PR5 (this PR) | `flows/` (`paper_flow` + `batch_run` + `BatchResult` + `_runner`) + `magic.py`; deletes `quantmind/{flow,llm,config,models}/`; coverage floor 65→75; 5th import-linter contract pins `flows + magic` as apex |
-| PR6 | `mind/memory/filesystem` MVP + trajectory archive (fills `_archive_run_artifacts` stub) |
+| #76 (merged) | `flows/` (`paper_flow` + `batch_run` + `BatchResult` + `_runner`) + `magic.py`; deletes `quantmind/{flow,llm,config,models}/`; coverage floor 65→75; 5th import-linter contract pins `flows + magic` as apex |
+| PR6 (this PR) | `mind/memory/filesystem` MVP + trajectory archive: `Memory` Protocol + `FilesystemMemory` (MCP filesystem server) + `MemoryRunHooks` + `RunRecord`; tightens `paper_flow.memory` + `_runner` to consume `Memory \| None`; replaces the PR5 `_archive_run_artifacts` stub with `try/finally` invoking `MemoryRunHooks.persist`; 6th import-linter contract pins `mind` boundaries |
 | PR7 | `mind/store/` + SQLite + `sqlite-vec` MVP; introduces `preprocess/chunk.py` with `tiktoken` |
 | PR8+ | Second flow (news/earnings) / observability cookbook / longer-term modules |
diff --git a/README.md b/README.md
index fb6390f..9d16605 100644
--- a/README.md
+++ b/README.md
@@ -224,10 +224,43 @@ async def main() -> None:
 asyncio.run(main())
 ```
 
+#### Persistent memory across a serial loop
+
+```python
+import asyncio
+
+from quantmind.configs.paper import ArxivIdentifier
+from quantmind.flows import paper_flow
+from quantmind.mind.memory import FilesystemMemory
+
+
+async def main() -> None:
+    mem = FilesystemMemory("./.qm-memory")
+    arxiv_ids = ["2401.12345", "2402.67890", "2403.11111"]
+    for paper_id in arxiv_ids:
+        paper = await paper_flow(
+            ArxivIdentifier(id=paper_id),
+            memory=mem,
+        )
+        print(paper.title)
+    # Trajectory records are now under ./.qm-memory/runs/.
+
+
+asyncio.run(main())
+```
+
+`FilesystemMemory` requires Node.js + `npx` on PATH (the SDK launches
+`@modelcontextprotocol/server-filesystem` over stdio). Each run also
+writes `<memory_dir>/runs/<run_id>.json` and appends a one-line
+summary to `<memory_dir>/runs.jsonl`. `FilesystemMemory` is for serial
+loops only — `batch_run` rejects `memory=` at the signature layer
+(see design doc §4.3.5).
+
 > **Note**: QuantMind is mid-migration to OpenAI Agents SDK
-> (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR5 lands the
-> apex layer (`flows/` + `magic.py`); the remaining work is the `mind/`
-> memory + store layer scheduled for PR6 and PR7.
+> (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR6 lands
+> `mind/memory/` (Memory Protocol + `FilesystemMemory` MVP + trajectory
+> archive); the remaining work is the `mind/store/` knowledge layer
+> scheduled for PR7+.
 
 ---
 
@@ -235,10 +268,11 @@ asyncio.run(main())
 
 - [x] Better `flow` design for user-friendly usage
 - [x] First production level example (Quant Paper Agent)
-- [ ] Migrate Agent layer to OpenAI Agents SDK
-- [ ] Standardize knowledge format with `knowledge/` (Pydantic-based)
+- [x] Migrate Agent layer to OpenAI Agents SDK
+- [x] Standardize knowledge format with `knowledge/` (Pydantic-based)
+- [x] Cross-step working memory (`mind/memory`) for serial document processing
 - [ ] Additional content sources (financial news, blogs, reports)
-- [ ] Cross-step working memory (`mind/memory`) for batch document processing
+- [ ] `mind/store/` — durable knowledge store with hybrid retrieval (PR7+)
 
 ---
 
@@ -252,18 +286,10 @@ QuantMind is designed with a larger vision: to become a comprehensive intelligen
 The foundation we're building today—starting with papers—will expand to encompass the entire financial information ecosystem.
 
 > [!NOTE]
-> **Future Conceptual Example (PR6 brings `FilesystemMemory`):**
->
-> ```python
-> from quantmind.configs.paper import ArxivIdentifier
-> from quantmind.flows import paper_flow
-> from quantmind.knowledge import Paper
-> from quantmind.mind.memory import FilesystemMemory  # PR6
->
-> memory = FilesystemMemory("./mem/factor-research/")
-> for arxiv_id in arxiv_ids:
->     paper: Paper = await paper_flow(ArxivIdentifier(id=arxiv_id), memory=memory)
-> ```
+> **`FilesystemMemory` landed in PR6.** See the runbook example above
+> (*Persistent memory across a serial loop*) for the canonical usage.
+> Future cognitive layers (`mind/store`, `mind/summarize_run`) build on
+> this foundation — they share the same `<memory_dir>/` layout.
 
 This future state represents our commitment to moving beyond simple data aggregation and toward genuine machine intelligence in the financial domain.
 

From d5f0c89caf78c6e05f4ee4a662ed336181148776 Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 02:00:03 +0800
Subject: [PATCH 09/11] fix(mind): align RunHooks override signatures with SDK
 base (PR6)

basedpyright caught two issues:
- _safe_repr's dump() call returned object (not str) because
  getattr loses the typed signature; wrap with str() to satisfy
  the return type.
- Lifecycle override parameter names must match RunHooksBase
  (context, not ctx) for reportIncompatibleMethodOverride.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quantmind/mind/memory/_run_hooks.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/quantmind/mind/memory/_run_hooks.py b/quantmind/mind/memory/_run_hooks.py
index c89f624..a61dca1 100644
--- a/quantmind/mind/memory/_run_hooks.py
+++ b/quantmind/mind/memory/_run_hooks.py
@@ -45,7 +45,7 @@ def _safe_repr(obj: Any) -> str:
     dump = getattr(obj, "model_dump_json", None)
     if callable(dump):
         try:
-            return dump()
+            return str(dump())
         except Exception:  # noqa: BLE001
             pass
     return str(obj)
@@ -75,7 +75,7 @@ def __init__(self, *, memory_dir: Path, archive_lock: asyncio.Lock) -> None:
         self._llm_timer_start: float | None = None
         self._tool_timer_starts: dict[int, float] = {}
 
-    async def on_agent_start(self, ctx: Any, agent: Any) -> None:
+    async def on_agent_start(self, context: Any, agent: Any) -> None:
         self._started_at = datetime.now(timezone.utc)
         self._agent_name = str(getattr(agent, "name", "") or "")
         self._agent_model = str(getattr(agent, "model", "") or "")
@@ -86,7 +86,7 @@ async def on_agent_start(self, ctx: Any, agent: Any) -> None:
     async def on_llm_start(self, *_: Any, **__: Any) -> None:
         self._llm_timer_start = time.monotonic()
 
-    async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None:
+    async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None:
         duration = (
             (time.monotonic() - self._llm_timer_start)
             if self._llm_timer_start is not None
@@ -106,11 +106,11 @@ async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None:
             }
         )
 
-    async def on_tool_start(self, ctx: Any, agent: Any, tool: Any) -> None:
+    async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None:
         self._tool_timer_starts[id(tool)] = time.monotonic()
 
     async def on_tool_end(
-        self, ctx: Any, agent: Any, tool: Any, result: Any
+        self, context: Any, agent: Any, tool: Any, result: Any
     ) -> None:
         start = self._tool_timer_starts.pop(id(tool), None)
         duration = (time.monotonic() - start) if start is not None else 0.0
@@ -123,7 +123,7 @@ async def on_tool_end(
             }
         )
 
-    async def on_agent_end(self, ctx: Any, agent: Any, output: Any) -> None:
+    async def on_agent_end(self, context: Any, agent: Any, output: Any) -> None:
         self._ended_at = datetime.now(timezone.utc)
         self._output_summary = _truncate(_safe_repr(output))
 

From 102c5be20eb006a12ff9e3b405e389ad70822f0f Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 02:06:12 +0800
Subject: [PATCH 10/11] docs: strip internal design-doc references from shipped
 surfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The next-step-architecture.md design doc is local-only (gitignored)
and shouldn't be referenced from any shipped docstring, comment,
error message, or end-user docs. This commit removes every such
reference (5 in quantmind/, 1 in README.md, 1 in CLAUDE.md) without
losing any user-facing meaning — the surrounding text still says
what the constraint is, just without the dangling doc pointer.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                          |  2 +-
 README.md                          |  3 +--
 quantmind/flows/batch.py           |  7 +++----
 quantmind/flows/paper.py           | 10 +++++-----
 quantmind/mind/memory/_protocol.py |  7 +++----
 5 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index f7829ca..535383c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -152,7 +152,7 @@ issue instead.
 - ❌ Introduce class-based `BaseFlow` / plugin registry / hook discovery
 - ❌ Wrap `from agents import ...` in a QuantMind-side facade — use the SDK directly
 - ❌ Mix `batch_run` and `memory` (mutually exclusive in MVP; `batch_run` rejects
-  `memory=` at the signature layer — design doc §4.3.5)
+  `memory=` at the signature layer)
 - ❌ Use `Dict[str, Any]` in init functions; use Pydantic models
 - ❌ Add hard deps on observability platforms (Langfuse / Logfire / etc.); document
   integration via `add_trace_processor()` in user-facing cookbook only
diff --git a/README.md b/README.md
index 9d16605..cab9e1c 100644
--- a/README.md
+++ b/README.md
@@ -253,8 +253,7 @@ asyncio.run(main())
 `@modelcontextprotocol/server-filesystem` over stdio). Each run also
 writes `<memory_dir>/runs/<run_id>.json` and appends a one-line
 summary to `<memory_dir>/runs.jsonl`. `FilesystemMemory` is for serial
-loops only — `batch_run` rejects `memory=` at the signature layer
-(see design doc §4.3.5).
+loops only — `batch_run` rejects `memory=` at the signature layer.
 
 > **Note**: QuantMind is mid-migration to OpenAI Agents SDK
 > (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR6 lands
diff --git a/quantmind/flows/batch.py b/quantmind/flows/batch.py
index 9e6c659..48c1309 100644
--- a/quantmind/flows/batch.py
+++ b/quantmind/flows/batch.py
@@ -2,8 +2,8 @@
 
 `batch_run` is the single concurrency primitive QuantMind ships in MVP.
 It does NOT support `memory=`; for memory-accumulating workflows users
-write a serial `for` loop themselves (design doc §4.3.5). This keeps the
-batch path stateless and free of cross-run race hazards.
+write a serial `for` loop themselves. This keeps the batch path
+stateless and free of cross-run race hazards.
 """
 
 import asyncio
@@ -93,8 +93,7 @@ async def batch_run(
         raise ValueError(
             "batch_run does not support `memory=` in MVP. For "
             "memory-accumulating workflows write a serial loop instead: "
-            "`for inp in inputs: await flow_fn(inp, cfg=cfg, memory=memory)`. "
-            "See design doc §4.3.5."
+            "`for inp in inputs: await flow_fn(inp, cfg=cfg, memory=memory)`."
         )
     if concurrency < 1:
         raise ValueError(f"concurrency must be >= 1, got {concurrency}")
diff --git a/quantmind/flows/paper.py b/quantmind/flows/paper.py
index bf5e31f..b58a786 100644
--- a/quantmind/flows/paper.py
+++ b/quantmind/flows/paper.py
@@ -8,7 +8,7 @@
 
 Customization happens through the configured ``PaperFlowCfg`` (Layer 1)
 or the keyword arguments on this function (Layer 2). To swap the whole
-flow, fork this file (Layer 3 — design doc §9).
+flow, fork this file (Layer 3).
 """
 
 from typing import Any, TypeVar
@@ -76,10 +76,10 @@ async def paper_flow(
 ) -> P | Paper:
     """Extract a ``Paper`` from a typed ``PaperInput``.
 
-    See design doc §4.1 for the rationale on each kwarg. When ``memory``
-    is supplied, ``memory.mcp_servers()`` and ``memory.tools()`` flow
-    through to the Agent unconditionally; trajectory archiving is gated
-    separately by ``cfg.archive_trajectory`` inside the runner.
+    When ``memory`` is supplied, ``memory.mcp_servers()`` and
+    ``memory.tools()`` flow through to the Agent unconditionally;
+    trajectory archiving is gated separately by
+    ``cfg.archive_trajectory`` inside the runner.
 
     Raises:
         UnsupportedContentTypeError: When fetched bytes are not PDF /
diff --git a/quantmind/mind/memory/_protocol.py b/quantmind/mind/memory/_protocol.py
index 6f0038f..ab85219 100644
--- a/quantmind/mind/memory/_protocol.py
+++ b/quantmind/mind/memory/_protocol.py
@@ -2,10 +2,9 @@
 
 Each method has its own narrow surface so concrete implementations can
 opt in to whichever channel(s) they need (in-process tools, MCP servers,
-lifecycle hooks). The Protocol does NOT prescribe MCP — see design doc
-\u00a711.2 for the rationale: future backends like an embedding-based
-``ChromaMemory`` are tool-only, while the MVP ``FilesystemMemory`` is
-MCP-based.
+lifecycle hooks). The Protocol does NOT prescribe MCP: a future
+embedding-based ``ChromaMemory`` could be tool-only, while the MVP
+``FilesystemMemory`` is MCP-based — both satisfy this same Protocol.
 """
 
 from typing import Any, Protocol, runtime_checkable

From 08e068ba5cedcc853e8d3e7ede2f8cc73ffd87cb Mon Sep 17 00:00:00 2001
From: pkuwkl <pkuwkl@gmail.com>
Date: Fri, 8 May 2026 14:31:01 +0800
Subject: [PATCH 11/11] docs(examples): runnable mind/memory walkthroughs (PR6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four self-contained scripts under examples/memory/ that exercise
FilesystemMemory + MemoryRunHooks end to end:

  01_basic.py            — shortest possible memory run; show disk layout
  02_serial_loop.py      — N-input serial loop sharing one memory_dir
  03_inspect_trajectory  — disk-only post-run analysis (no API needed)
  04_custom_run_hooks    — compose your own RunHooks via extra_run_hooks=

README.md is the index. ruff per-file-ignores skip docstring rules
(D-series) for examples/ — module-level docstring is enough for short
demos.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 examples/memory/01_basic.py              | 62 ++++++++++++++++
 examples/memory/02_serial_loop.py        | 75 +++++++++++++++++++
 examples/memory/03_inspect_trajectory.py | 59 +++++++++++++++
 examples/memory/04_custom_run_hooks.py   | 95 ++++++++++++++++++++++++
 examples/memory/README.md                | 45 +++++++++++
 pyproject.toml                           |  3 +
 6 files changed, 339 insertions(+)
 create mode 100644 examples/memory/01_basic.py
 create mode 100644 examples/memory/02_serial_loop.py
 create mode 100644 examples/memory/03_inspect_trajectory.py
 create mode 100644 examples/memory/04_custom_run_hooks.py
 create mode 100644 examples/memory/README.md

diff --git a/examples/memory/01_basic.py b/examples/memory/01_basic.py
new file mode 100644
index 0000000..47881c8
--- /dev/null
+++ b/examples/memory/01_basic.py
@@ -0,0 +1,62 @@
+"""01 — The shortest memory run.
+
+Builds a ``FilesystemMemory``, runs ``paper_flow`` once with it, then
+prints what the trajectory archive looks like on disk afterwards.
+
+What to look for:
+
+- ``./.qm-memory/`` is created with ``notes/`` ``items/`` ``runs/`` and
+  a seeded ``README.md`` (the agent's own usage guide for the dir).
+- One ``runs/<run_id>.json`` file lands per ``paper_flow`` call.
+- ``runs.jsonl`` gets one new line per call (a denormalised index of
+  the per-run files — handy for `jq` / `pandas`).
+
+Prerequisites: OPENAI_API_KEY in env, Node.js + npx on PATH.
+
+Run:
+    python examples/memory/01_basic.py
+"""
+
+import asyncio
+from pathlib import Path
+
+from quantmind.configs.paper import RawText
+from quantmind.flows import paper_flow
+from quantmind.mind.memory import FilesystemMemory
+
+
+async def main() -> None:
+    memory_dir = Path("./.qm-memory")
+    mem = FilesystemMemory(memory_dir)
+
+    # A tiny paper-shaped input keeps cost low; swap in
+    # ``ArxivIdentifier(id="...")`` to see a real extraction end to end.
+    paper = await paper_flow(
+        RawText(
+            text=(
+                "# Toy paper\n\n"
+                "Title: Cross-sectional momentum, simplified\n"
+                "Author: Demo\n\n"
+                "Body: We illustrate momentum on a 3-stock universe."
+            )
+        ),
+        memory=mem,
+    )
+    print(f"Extracted paper: {paper.title!r}")
+
+    # Show what landed under .qm-memory/.
+    print(f"\nMemory layout under {memory_dir}:")
+    for child in sorted(memory_dir.rglob("*")):
+        if child.is_file():
+            print(
+                f"  {child.relative_to(memory_dir)}  ({child.stat().st_size}B)"
+            )
+
+    runs_jsonl = memory_dir / "runs.jsonl"
+    if runs_jsonl.exists():
+        print(f"\nLast line of {runs_jsonl}:")
+        print(runs_jsonl.read_text().splitlines()[-1][:200] + " ...")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/memory/02_serial_loop.py b/examples/memory/02_serial_loop.py
new file mode 100644
index 0000000..c8968b6
--- /dev/null
+++ b/examples/memory/02_serial_loop.py
@@ -0,0 +1,75 @@
+"""02 — Serial loop sharing one FilesystemMemory.
+
+The point of cross-step memory is letting run N see what runs 1..N-1
+left behind. ``FilesystemMemory`` exposes ``notes/`` and ``items/`` to
+the Agent through an MCP filesystem server, so the Agent can list /
+read / write files there during its own turn.
+
+What to look for:
+
+- ``./.qm-memory/runs/`` accumulates one trajectory record per loop
+  iteration (3 here).
+- ``./.qm-memory/notes/`` may grow if the Agent decides to write notes
+  while extracting (it sees the seeded ``README.md`` and is encouraged
+  to do so).
+- ``./.qm-memory/runs.jsonl`` has 3 appended lines after this script.
+
+This is the "memory-accumulating workflow" pattern. ``batch_run``
+rejects ``memory=`` at the signature level — for memory accumulation
+you write the loop yourself, exactly like below.
+
+Prerequisites: OPENAI_API_KEY, Node.js + npx, network.
+
+Run:
+    python examples/memory/02_serial_loop.py
+"""
+
+import asyncio
+from pathlib import Path
+
+from quantmind.configs.paper import RawText
+from quantmind.flows import paper_flow
+from quantmind.mind.memory import FilesystemMemory
+
+_FAKE_PAPERS = [
+    (
+        "# Momentum returns 1\n"
+        "Title: Cross-sectional momentum on US equities\n"
+        "Body: Long winners short losers monthly."
+    ),
+    (
+        "# Momentum returns 2\n"
+        "Title: Time-series momentum on commodities\n"
+        "Body: 12-month look-back, monthly rebalance."
+    ),
+    (
+        "# Momentum returns 3\n"
+        "Title: Combining XS and TS momentum\n"
+        "Body: 50/50 equal weight composite."
+    ),
+]
+
+
+async def main() -> None:
+    mem = FilesystemMemory(Path("./.qm-memory"))
+
+    for idx, body in enumerate(_FAKE_PAPERS, start=1):
+        print(f"\n=== run {idx}/3 ===")
+        paper = await paper_flow(RawText(text=body), memory=mem)
+        print(f"  -> {paper.title!r}")
+
+    # Quick post-run summary.
+    runs_dir = mem.memory_dir / "runs"
+    print(
+        f"\nTrajectory files: {len(list(runs_dir.glob('*.json')))} in {runs_dir}"
+    )
+
+    notes_dir = mem.memory_dir / "notes"
+    notes = list(notes_dir.glob("*"))
+    print(f"Notes the agent left: {len(notes)} in {notes_dir}")
+    for n in notes[:5]:
+        print(f"  - {n.name}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/memory/03_inspect_trajectory.py b/examples/memory/03_inspect_trajectory.py
new file mode 100644
index 0000000..c705426
--- /dev/null
+++ b/examples/memory/03_inspect_trajectory.py
@@ -0,0 +1,59 @@
+"""03 — Read trajectory archive offline.
+
+After 01 / 02 have run, this script needs no network and no npx — it
+opens ``runs.jsonl`` and aggregates the ``RunRecord`` fields so you
+can see exactly what ``MemoryRunHooks`` captured: timing, token usage,
+LLM call breakdown, tool calls.
+
+This is also the right shape for building dashboards / cost reports
+later: each line is one self-contained run record.
+
+Run:
+    python examples/memory/03_inspect_trajectory.py ./.qm-memory
+"""
+
+import json
+import sys
+from pathlib import Path
+
+
+def main() -> None:
+    memory_dir = Path(sys.argv[1] if len(sys.argv) > 1 else "./.qm-memory")
+    runs_jsonl = memory_dir / "runs.jsonl"
+    if not runs_jsonl.exists():
+        print(f"No {runs_jsonl}. Run 01_basic.py or 02_serial_loop.py first.")
+        sys.exit(1)
+
+    records = [
+        json.loads(line) for line in runs_jsonl.read_text().splitlines() if line
+    ]
+    print(f"Loaded {len(records)} run record(s) from {runs_jsonl}\n")
+
+    # Per-run summary.
+    for r in records:
+        toks = r["tokens_total"]
+        print(
+            f"  {r['run_id']}  "
+            f"workflow={r['workflow_name']}  "
+            f"duration={r['duration_seconds']:.2f}s  "
+            f"in={toks['input']}/out={toks['output']}  "
+            f"llm_calls={len(r['llm_calls'])}  "
+            f"tool_calls={len(r['tool_calls'])}  "
+            f"error={r['error']!r}"
+        )
+
+    # Aggregate.
+    total_in = sum(r["tokens_total"]["input"] for r in records)
+    total_out = sum(r["tokens_total"]["output"] for r in records)
+    total_dur = sum(r["duration_seconds"] for r in records)
+    n_failed = sum(1 for r in records if r["error"])
+    print()
+    print(
+        f"Aggregate: tokens_in={total_in} tokens_out={total_out} "
+        f"total_duration={total_dur:.2f}s "
+        f"failed={n_failed}/{len(records)}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/04_custom_run_hooks.py b/examples/memory/04_custom_run_hooks.py
new file mode 100644
index 0000000..27da29c
--- /dev/null
+++ b/examples/memory/04_custom_run_hooks.py
@@ -0,0 +1,95 @@
+"""04 — Custom RunHooks composing with MemoryRunHooks.
+
+``paper_flow`` accepts ``extra_run_hooks=[...]`` so you can attach
+your own ``RunHooks`` (logger, metrics emitter, slack ping, ...)
+alongside the built-in ``MemoryRunHooks`` that ``FilesystemMemory``
+contributes. The runner composes them all into one fan-out hook that
+fires every lifecycle event for every registered hook in registration
+order.
+
+What to look for:
+
+- ``[memory]`` lines printed by the built-in MemoryRunHooks aren't
+  visible (it accumulates silently, then writes ``runs/<id>.json``
+  in ``finally``).
+- ``[my-logger]`` lines below come from ``ConsoleLoggerHooks``, the
+  custom hook we plug in here.
+
+Prerequisites: OPENAI_API_KEY, Node.js + npx.
+
+Run:
+    python examples/memory/04_custom_run_hooks.py
+"""
+
+import asyncio
+from pathlib import Path
+from typing import Any
+
+from agents import RunHooks
+
+from quantmind.configs.paper import RawText
+from quantmind.flows import paper_flow
+from quantmind.mind.memory import FilesystemMemory
+
+
+class ConsoleLoggerHooks(RunHooks[Any]):
+    """Tiny example hook that prints lifecycle events to stdout."""
+
+    async def on_agent_start(self, context: Any, agent: Any) -> None:
+        print(
+            f"[my-logger] agent start name={agent.name!r} model={agent.model!r}"
+        )
+
+    async def on_llm_start(self, *_: Any, **__: Any) -> None:
+        print("[my-logger] llm start")
+
+    async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None:
+        usage = getattr(response, "usage", None)
+        if usage is not None:
+            print(
+                f"[my-logger] llm end tokens_in={getattr(usage, 'input_tokens', 0)} "
+                f"tokens_out={getattr(usage, 'output_tokens', 0)}"
+            )
+        else:
+            print("[my-logger] llm end (no usage info)")
+
+    async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None:
+        print(f"[my-logger] tool start name={getattr(tool, 'name', '?')!r}")
+
+    async def on_tool_end(
+        self, context: Any, agent: Any, tool: Any, result: Any
+    ) -> None:
+        snippet = (
+            (str(result)[:60] + "...") if len(str(result)) > 60 else result
+        )
+        print(
+            f"[my-logger] tool end name={getattr(tool, 'name', '?')!r} "
+            f"result={snippet!r}"
+        )
+
+    async def on_agent_end(self, context: Any, agent: Any, output: Any) -> None:
+        print(f"[my-logger] agent end output_type={type(output).__name__}")
+
+
+async def main() -> None:
+    mem = FilesystemMemory(Path("./.qm-memory"))
+
+    paper = await paper_flow(
+        RawText(
+            text=(
+                "# Toy paper\n\nTitle: Custom-hook demo\n\n"
+                "Body: Show that user RunHooks fire alongside MemoryRunHooks."
+            )
+        ),
+        memory=mem,
+        extra_run_hooks=[ConsoleLoggerHooks()],
+    )
+    print(f"\nExtracted: {paper.title!r}")
+    print(
+        f"\nTrajectory file: "
+        f"{sorted((mem.memory_dir / 'runs').glob('*.json'))[-1]}"
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/memory/README.md b/examples/memory/README.md
new file mode 100644
index 0000000..4d66213
--- /dev/null
+++ b/examples/memory/README.md
@@ -0,0 +1,45 @@
+# `mind/memory` examples
+
+Small runnable scripts that walk you through `FilesystemMemory` +
+`MemoryRunHooks` end to end. Each one is self-contained — read the
+top-of-file comment block for what it shows.
+
+## Prerequisites
+
+- `pip install -e ".[dev]"` (so `quantmind` is importable).
+- `OPENAI_API_KEY` set in your shell (the real Agent run uses an LLM).
+- Node.js + `npx` on PATH — `FilesystemMemory` launches
+  `@modelcontextprotocol/server-filesystem` over stdio. Examples 01,
+  02, and 04 spawn a real MCP subprocess; example 03 only reads disk.
+
+## What each script demonstrates
+
+| # | Script | What you learn |
+|---|--------|----------------|
+| 01 | `01_basic.py` | The shortest possible memory run: build a `FilesystemMemory`, pass it to `paper_flow`, then look at the disk layout that gets created (`notes/`, `items/`, `runs/`, `runs.jsonl`). |
+| 02 | `02_serial_loop.py` | Process several inputs in a serial `for` loop sharing one `FilesystemMemory`. Each run can read what previous runs left in `notes/` via the MCP filesystem server. (`batch_run` rejects `memory=` by design — for memory-accumulating workflows, you write the loop yourself.) |
+| 03 | `03_inspect_trajectory.py` | Disk-only analysis: open `runs.jsonl`, parse the `RunRecord` JSON, and aggregate tokens / durations across runs. No network or `npx` needed. |
+| 04 | `04_custom_run_hooks.py` | Compose your own `RunHooks` (e.g., a custom logger) alongside the built-in `MemoryRunHooks` via the `extra_run_hooks=` kwarg. Both hooks fire for every lifecycle event in the order you registered them. |
+
+## Run them
+
+```bash
+# from the repo root, after installing
+python examples/memory/01_basic.py
+python examples/memory/02_serial_loop.py
+python examples/memory/03_inspect_trajectory.py ./.qm-memory
+python examples/memory/04_custom_run_hooks.py
+```
+
+All four examples write under `./.qm-memory/` (a directory created
+under your current working directory). Delete it freely between runs;
+nothing is shared with your other projects.
+
+## A note on `cfg.archive_trajectory`
+
+`FilesystemMemory.run_hooks()` returns the `MemoryRunHooks` instance
+that produces `runs/<run_id>.json` and the `runs.jsonl` index. The
+runner only attaches it when `cfg.archive_trajectory` is `True`
+(the default). If you set it to `False`, the agent still gets the
+`mcp_servers` + `tools` from your `Memory` (so it can read previous
+notes) — only the trajectory archive is suppressed.
diff --git a/pyproject.toml b/pyproject.toml
index 270ded0..654ff32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,6 +81,9 @@ ignore = [
 [tool.ruff.lint.per-file-ignores]
 # Tests don't need module/function docstrings or strict bugbear rules.
 "tests/**/*.py" = ["D", "B"]
+# Example scripts: top-of-file module docstring is enough; per-function
+# docstrings just clutter short demos.
+"examples/**/*.py" = ["D"]
 "**/__init__.py" = ["D104", "F401"]
 
 [tool.ruff.lint.pycodestyle]