From 987ea67451ca6278d3adb770629cc5e1390b87ff Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:47:40 +0800 Subject: [PATCH 01/11] feat(mind): Memory Protocol skeleton (PR6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The granular Protocol lets each backend opt in to whichever surface it needs — in-process tools, MCP servers, or lifecycle hooks. reset() is the only required side-effect method. Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/mind/__init__.py | 10 +++++++++ quantmind/mind/memory/__init__.py | 5 +++++ quantmind/mind/memory/_protocol.py | 35 +++++++++++++++++++++++++++++ tests/mind/__init__.py | 0 tests/mind/memory/__init__.py | 0 tests/mind/memory/test_protocol.py | 36 ++++++++++++++++++++++++++++++ 6 files changed, 86 insertions(+) create mode 100644 quantmind/mind/__init__.py create mode 100644 quantmind/mind/memory/__init__.py create mode 100644 quantmind/mind/memory/_protocol.py create mode 100644 tests/mind/__init__.py create mode 100644 tests/mind/memory/__init__.py create mode 100644 tests/mind/memory/test_protocol.py diff --git a/quantmind/mind/__init__.py b/quantmind/mind/__init__.py new file mode 100644 index 0000000..febb682 --- /dev/null +++ b/quantmind/mind/__init__.py @@ -0,0 +1,10 @@ +"""quantmind.mind — cognitive layer. + +PR6 introduces ``mind/memory/`` (Memory Protocol + filesystem backend). +PR7+ will add ``mind/store/`` (knowledge store) and +``mind/summarize_run`` (trajectory summariser). +""" + +from quantmind.mind.memory import Memory + +__all__ = ["Memory"] diff --git a/quantmind/mind/memory/__init__.py b/quantmind/mind/memory/__init__.py new file mode 100644 index 0000000..d7d070a --- /dev/null +++ b/quantmind/mind/memory/__init__.py @@ -0,0 +1,5 @@ +"""quantmind.mind.memory — Memory Protocol + filesystem MVP backend.""" + +from quantmind.mind.memory._protocol import Memory + +__all__ = ["Memory"] diff --git a/quantmind/mind/memory/_protocol.py b/quantmind/mind/memory/_protocol.py new file mode 100644 index 0000000..6f0038f --- /dev/null +++ b/quantmind/mind/memory/_protocol.py @@ -0,0 +1,35 @@ +"""Memory Protocol — granular cross-step working memory contract. + +Each method has its own narrow surface so concrete implementations can +opt in to whichever channel(s) they need (in-process tools, MCP servers, +lifecycle hooks). The Protocol does NOT prescribe MCP — see design doc +\u00a711.2 for the rationale: future backends like an embedding-based +``ChromaMemory`` are tool-only, while the MVP ``FilesystemMemory`` is +MCP-based. +""" + +from typing import Any, Protocol, runtime_checkable + +from agents import RunHooks, Tool +from agents.mcp import MCPServer + + +@runtime_checkable +class Memory(Protocol): + """Cross-step working memory exposed to a flow's Agent.""" + + def tools(self) -> list[Tool]: + """In-process ``@function_tool`` list for the Agent.""" + ... + + def mcp_servers(self) -> list[MCPServer]: + """MCP servers exposed to the Agent.""" + ... + + def run_hooks(self) -> RunHooks[Any] | None: + """Lifecycle hooks for accounting / archiving / item indexing.""" + ... + + async def reset(self) -> None: + """Wipe the memory area. Implementations may be no-ops.""" + ... diff --git a/tests/mind/__init__.py b/tests/mind/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mind/memory/__init__.py b/tests/mind/memory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mind/memory/test_protocol.py b/tests/mind/memory/test_protocol.py new file mode 100644 index 0000000..5034ffb --- /dev/null +++ b/tests/mind/memory/test_protocol.py @@ -0,0 +1,36 @@ +"""Tests for ``quantmind.mind.memory._protocol``.""" + +import unittest +from typing import Any + +from agents import RunHooks, Tool +from agents.mcp import MCPServer + +from quantmind.mind.memory import Memory + + +class _CompleteStub: + def tools(self) -> list[Tool]: + return [] + + def mcp_servers(self) -> list[MCPServer]: + return [] + + def run_hooks(self) -> RunHooks[Any] | None: + return None + + async def reset(self) -> None: + return None + + +class _MissingMethodStub: + def tools(self) -> list[Tool]: + return [] + + +class MemoryProtocolTests(unittest.TestCase): + def test_runtime_checkable_accepts_complete_stub(self) -> None: + self.assertIsInstance(_CompleteStub(), Memory) + + def test_runtime_checkable_rejects_incomplete_stub(self) -> None: + self.assertNotIsInstance(_MissingMethodStub(), Memory) From 1e88e89454bfbb8a6ba25d1711050a36c6fb6649 Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:48:56 +0800 Subject: [PATCH 02/11] feat(mind): RunRecord + atomic write_run_record (PR6) generate_run_id produces a sortable timestamp plus a 3-char base36 suffix. write_run_record uses tmp+replace for the per-run JSON file and appends to runs.jsonl under an asyncio.Lock; cross-process concurrency is undefined. Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/mind/memory/_trajectory.py | 99 +++++++++++++++++++++++ tests/mind/memory/test_trajectory.py | 114 +++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 quantmind/mind/memory/_trajectory.py create mode 100644 tests/mind/memory/test_trajectory.py diff --git a/quantmind/mind/memory/_trajectory.py b/quantmind/mind/memory/_trajectory.py new file mode 100644 index 0000000..c51f2c9 --- /dev/null +++ b/quantmind/mind/memory/_trajectory.py @@ -0,0 +1,99 @@ +"""Trajectory archive: ``RunRecord`` schema + atomic writer. + +Each ``Runner.run`` invocation produces one ``RunRecord`` that is +serialised to ``/runs/.json`` (atomic via +``os.replace``) and appended to ``/runs.jsonl`` (one +JSON line per run, append-only). + +Cross-process concurrency is undefined; an ``asyncio.Lock`` (held by +the calling ``FilesystemMemory``) serialises writes within a single +Python process. +""" + +import asyncio +import json +import os +import secrets +from dataclasses import asdict, dataclass +from datetime import datetime +from pathlib import Path +from typing import Any + +_BASE36 = "0123456789abcdefghijklmnopqrstuvwxyz" + + +@dataclass(frozen=True, slots=True) +class RunRecord: + """A single QuantMind flow run trajectory record.""" + + schema_version: int + run_id: str + workflow_name: str + trace_id: str | None + started_at: datetime + ended_at: datetime + duration_seconds: float + agent: dict[str, str] + llm_calls: list[dict[str, Any]] + tool_calls: list[dict[str, Any]] + memory_ops: dict[str, list[str]] + tokens_total: dict[str, int] + cost_estimate_usd: float + input_summary: str + output_summary: str + error: str | None + + +def generate_run_id(now: datetime) -> str: + """Build ``YYYYMMDDTHHMMSSmmm`` (UTC) plus 3 base36 random chars.""" + stamp = now.strftime("%Y%m%dT%H%M%S") + f"{now.microsecond // 1000:03d}" + suffix = "".join(secrets.choice(_BASE36) for _ in range(3)) + return f"{stamp}{suffix}" + + +def _to_jsonable(value: Any) -> Any: + if isinstance(value, datetime): + return value.isoformat().replace("+00:00", "Z") + raise TypeError( + f"Object of type {type(value).__name__} is not JSON serialisable" + ) + + +def _serialise(record: RunRecord) -> str: + return json.dumps( + asdict(record), + default=_to_jsonable, + ensure_ascii=False, + ) + + +async def write_run_record( + memory_dir: Path, + record: RunRecord, + *, + archive_lock: asyncio.Lock, +) -> None: + """Persist ``record`` atomically and append to ``runs.jsonl``. + + Atomicity: + + - The per-run JSON file is written to ``.json.tmp`` and + then ``os.replace``-d into place; same-FS rename is atomic on + POSIX, preventing partial reads. + - The ``runs.jsonl`` append is serialised across one Python + process via ``archive_lock``; cross-process concurrency is + unsupported (documented in ``FilesystemMemory``). + """ + payload = _serialise(record) + runs_dir = memory_dir / "runs" + runs_dir.mkdir(parents=True, exist_ok=True) + final = runs_dir / f"{record.run_id}.json" + tmp = runs_dir / f"{record.run_id}.json.tmp" + tmp.write_text(payload, encoding="utf-8") + os.replace(tmp, final) + + index = memory_dir / "runs.jsonl" + async with archive_lock: + with index.open("a", encoding="utf-8") as fh: + fh.write(payload) + fh.write("\n") diff --git a/tests/mind/memory/test_trajectory.py b/tests/mind/memory/test_trajectory.py new file mode 100644 index 0000000..a00212b --- /dev/null +++ b/tests/mind/memory/test_trajectory.py @@ -0,0 +1,114 @@ +"""Tests for ``quantmind.mind.memory._trajectory``.""" + +import asyncio +import json +import tempfile +import unittest +from datetime import datetime, timezone +from pathlib import Path + +from quantmind.mind.memory._trajectory import ( + RunRecord, + generate_run_id, + write_run_record, +) + + +def _fixture_record(run_id: str) -> RunRecord: + started = datetime(2026, 5, 8, 12, 30, 45, 123000, tzinfo=timezone.utc) + ended = datetime(2026, 5, 8, 12, 30, 50, 456000, tzinfo=timezone.utc) + return RunRecord( + schema_version=1, + run_id=run_id, + workflow_name="quantmind.paper_flow", + trace_id="trace_abc", + started_at=started, + ended_at=ended, + duration_seconds=5.333, + agent={ + "name": "paper_extractor", + "model": "gpt-4o", + "instructions_hash": "abc", + }, + llm_calls=[ + { + "tokens_in": 10, + "tokens_out": 5, + "duration_s": 0.5, + "model": "gpt-4o", + } + ], + tool_calls=[], + memory_ops={"files_read": [], "files_written": []}, + tokens_total={"input": 10, "output": 5}, + cost_estimate_usd=0.0, + input_summary="hello", + output_summary="world", + error=None, + ) + + +class GenerateRunIdTests(unittest.TestCase): + def test_format_matches_documented_shape(self) -> None: + now = datetime(2026, 5, 8, 12, 30, 45, 123456, tzinfo=timezone.utc) + run_id = generate_run_id(now) + self.assertRegex(run_id, r"^20260508T123045123[0-9a-z]{3}$") + + def test_different_calls_produce_different_ids(self) -> None: + now = datetime(2026, 5, 8, 12, 30, 45, 123000, tzinfo=timezone.utc) + ids = {generate_run_id(now) for _ in range(50)} + self.assertGreater(len(ids), 40) + + +class WriteRunRecordTests(unittest.IsolatedAsyncioTestCase): + async def test_writes_atomic_per_run_file(self) -> None: + with tempfile.TemporaryDirectory() as raw_dir: + mem_dir = Path(raw_dir) + (mem_dir / "runs").mkdir() + record = _fixture_record("rid001abc") + await write_run_record(mem_dir, record, archive_lock=asyncio.Lock()) + target = mem_dir / "runs" / "rid001abc.json" + self.assertTrue(target.exists()) + tmp = mem_dir / "runs" / "rid001abc.json.tmp" + self.assertFalse(tmp.exists()) + payload = json.loads(target.read_text()) + self.assertEqual(payload["run_id"], "rid001abc") + self.assertEqual( + payload["started_at"], "2026-05-08T12:30:45.123000Z" + ) + + async def test_appends_one_line_to_runs_jsonl(self) -> None: + with tempfile.TemporaryDirectory() as raw_dir: + mem_dir = Path(raw_dir) + (mem_dir / "runs").mkdir() + lock = asyncio.Lock() + await write_run_record( + mem_dir, _fixture_record("a01abc"), archive_lock=lock + ) + await write_run_record( + mem_dir, _fixture_record("b02def"), archive_lock=lock + ) + lines = (mem_dir / "runs.jsonl").read_text().splitlines() + self.assertEqual(len(lines), 2) + ids = [json.loads(line)["run_id"] for line in lines] + self.assertEqual(ids, ["a01abc", "b02def"]) + + async def test_concurrent_writes_serialise_under_lock(self) -> None: + with tempfile.TemporaryDirectory() as raw_dir: + mem_dir = Path(raw_dir) + (mem_dir / "runs").mkdir() + lock = asyncio.Lock() + await asyncio.gather( + write_run_record( + mem_dir, + _fixture_record("c01abc"), + archive_lock=lock, + ), + write_run_record( + mem_dir, + _fixture_record("c02def"), + archive_lock=lock, + ), + ) + lines = (mem_dir / "runs.jsonl").read_text().splitlines() + self.assertEqual(len(lines), 2) From acd359aaab6a5cd1e4b5e4145c71daa8bf5aa4d8 Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:50:24 +0800 Subject: [PATCH 03/11] feat(mind): MemoryRunHooks accumulator + persist (PR6) Lifecycle methods accumulate llm_calls and tool_calls plus agent metadata. persist() is invoked by the runner in finally so failed runs still archive (with error set to str(exc)). Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/mind/memory/_run_hooks.py | 172 ++++++++++++++++++++++++++++ tests/mind/memory/test_run_hooks.py | 137 ++++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 quantmind/mind/memory/_run_hooks.py create mode 100644 tests/mind/memory/test_run_hooks.py diff --git a/quantmind/mind/memory/_run_hooks.py b/quantmind/mind/memory/_run_hooks.py new file mode 100644 index 0000000..c89f624 --- /dev/null +++ b/quantmind/mind/memory/_run_hooks.py @@ -0,0 +1,172 @@ +"""``MemoryRunHooks`` — per-run lifecycle accumulator + ``persist()``. + +Constructed fresh per ``paper_flow`` invocation by +``FilesystemMemory.run_hooks()``. Each lifecycle method accumulates +metrics; the runner calls ``persist()`` in a ``finally`` block so +runs that fail still produce a trajectory record (with ``error`` +set to ``str(exc)``). +""" + +import asyncio +import hashlib +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from agents import RunHooks + +from quantmind.mind.memory._trajectory import ( + RunRecord, + generate_run_id, + write_run_record, +) + +_SUMMARY_LIMIT = 500 +_TRUNC_SUFFIX = "... [truncated]" + + +def _truncate(s: str, limit: int = _SUMMARY_LIMIT) -> str: + if len(s) <= limit: + return s + head = limit - len(_TRUNC_SUFFIX) + return s[:head] + _TRUNC_SUFFIX + + +def _instructions_hash(instructions: str | None) -> str: + if not instructions: + return "" + return hashlib.sha256(instructions.encode("utf-8")).hexdigest()[:16] + + +def _safe_repr(obj: Any) -> str: + if obj is None: + return "" + dump = getattr(obj, "model_dump_json", None) + if callable(dump): + try: + return dump() + except Exception: # noqa: BLE001 + pass + return str(obj) + + +class MemoryRunHooks(RunHooks[Any]): + """Per-run lifecycle accumulator that persists a ``RunRecord``. + + State lives on the instance and is written exactly once via + ``persist`` — typically called by the runner in a ``finally`` + block so failed runs still archive. + """ + + def __init__(self, *, memory_dir: Path, archive_lock: asyncio.Lock) -> None: + self._memory_dir = memory_dir + self._archive_lock = archive_lock + + self._started_at: datetime | None = None + self._ended_at: datetime | None = None + self._agent_name: str = "" + self._agent_model: str = "" + self._instructions_hash: str = "" + self._output_summary: str = "" + self._llm_calls: list[dict[str, Any]] = [] + self._tool_calls: list[dict[str, Any]] = [] + + self._llm_timer_start: float | None = None + self._tool_timer_starts: dict[int, float] = {} + + async def on_agent_start(self, ctx: Any, agent: Any) -> None: + self._started_at = datetime.now(timezone.utc) + self._agent_name = str(getattr(agent, "name", "") or "") + self._agent_model = str(getattr(agent, "model", "") or "") + self._instructions_hash = _instructions_hash( + getattr(agent, "instructions", None) + ) + + async def on_llm_start(self, *_: Any, **__: Any) -> None: + self._llm_timer_start = time.monotonic() + + async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None: + duration = ( + (time.monotonic() - self._llm_timer_start) + if self._llm_timer_start is not None + else 0.0 + ) + self._llm_timer_start = None + usage = getattr(response, "usage", None) + tokens_in = int(getattr(usage, "input_tokens", 0) or 0) + tokens_out = int(getattr(usage, "output_tokens", 0) or 0) + model = str(getattr(agent, "model", "") or "") + self._llm_calls.append( + { + "tokens_in": tokens_in, + "tokens_out": tokens_out, + "duration_s": round(duration, 4), + "model": model, + } + ) + + async def on_tool_start(self, ctx: Any, agent: Any, tool: Any) -> None: + self._tool_timer_starts[id(tool)] = time.monotonic() + + async def on_tool_end( + self, ctx: Any, agent: Any, tool: Any, result: Any + ) -> None: + start = self._tool_timer_starts.pop(id(tool), None) + duration = (time.monotonic() - start) if start is not None else 0.0 + name = str(getattr(tool, "name", "") or "") + self._tool_calls.append( + { + "name": name, + "args": (_truncate(str(result)) if result is not None else ""), + "duration_s": round(duration, 4), + } + ) + + async def on_agent_end(self, ctx: Any, agent: Any, output: Any) -> None: + self._ended_at = datetime.now(timezone.utc) + self._output_summary = _truncate(_safe_repr(output)) + + async def persist( + self, + *, + workflow_name: str, + result: Any, + error: BaseException | None, + input_payload: Any, + ) -> None: + """Build a ``RunRecord`` from accumulated state and write it. + + Called by the runner in ``finally`` so failed runs archive too. + """ + ended = self._ended_at or datetime.now(timezone.utc) + started = self._started_at or ended + tokens_total = { + "input": sum(c["tokens_in"] for c in self._llm_calls), + "output": sum(c["tokens_out"] for c in self._llm_calls), + } + record = RunRecord( + schema_version=1, + run_id=generate_run_id(started), + workflow_name=workflow_name, + trace_id=(getattr(result, "trace_id", None) if result else None), + started_at=started, + ended_at=ended, + duration_seconds=round((ended - started).total_seconds(), 4), + agent={ + "name": self._agent_name, + "model": self._agent_model, + "instructions_hash": self._instructions_hash, + }, + llm_calls=list(self._llm_calls), + tool_calls=list(self._tool_calls), + memory_ops={"files_read": [], "files_written": []}, + tokens_total=tokens_total, + cost_estimate_usd=0.0, + input_summary=_truncate(_safe_repr(input_payload)), + output_summary=self._output_summary, + error=str(error) if error is not None else None, + ) + await write_run_record( + self._memory_dir, record, archive_lock=self._archive_lock + ) diff --git a/tests/mind/memory/test_run_hooks.py b/tests/mind/memory/test_run_hooks.py new file mode 100644 index 0000000..fcab3f6 --- /dev/null +++ b/tests/mind/memory/test_run_hooks.py @@ -0,0 +1,137 @@ +"""Tests for ``quantmind.mind.memory._run_hooks``.""" + +import asyncio +import tempfile +import unittest +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch + +from quantmind.mind.memory._run_hooks import MemoryRunHooks + + +def _make_hooks(tmpdir: Path) -> MemoryRunHooks: + (tmpdir / "runs").mkdir(parents=True, exist_ok=True) + return MemoryRunHooks(memory_dir=tmpdir, archive_lock=asyncio.Lock()) + + +class MemoryRunHooksLifecycleTests(unittest.IsolatedAsyncioTestCase): + async def test_on_agent_start_records_metadata(self) -> None: + with tempfile.TemporaryDirectory() as raw: + hooks = _make_hooks(Path(raw)) + agent = SimpleNamespace( + name="paper_extractor", + model="gpt-4o", + instructions="extract papers", + ) + await hooks.on_agent_start(SimpleNamespace(), agent) + self.assertEqual(hooks._agent_name, "paper_extractor") + self.assertEqual(hooks._agent_model, "gpt-4o") + self.assertEqual(len(hooks._instructions_hash), 16) + + async def test_on_llm_end_accumulates_call(self) -> None: + with tempfile.TemporaryDirectory() as raw: + hooks = _make_hooks(Path(raw)) + await hooks.on_agent_start( + SimpleNamespace(), + SimpleNamespace(name="a", model="gpt-4o", instructions=""), + ) + await hooks.on_llm_start() + response = SimpleNamespace( + usage=SimpleNamespace(input_tokens=42, output_tokens=11) + ) + await hooks.on_llm_end( + SimpleNamespace(), + SimpleNamespace(model="gpt-4o"), + response, + ) + self.assertEqual(len(hooks._llm_calls), 1) + self.assertEqual(hooks._llm_calls[0]["tokens_in"], 42) + self.assertEqual(hooks._llm_calls[0]["tokens_out"], 11) + + async def test_on_llm_end_handles_missing_usage(self) -> None: + with tempfile.TemporaryDirectory() as raw: + hooks = _make_hooks(Path(raw)) + await hooks.on_llm_start() + await hooks.on_llm_end( + SimpleNamespace(), + SimpleNamespace(model="gpt-4o"), + SimpleNamespace(usage=None), + ) + self.assertEqual(hooks._llm_calls[0]["tokens_in"], 0) + self.assertEqual(hooks._llm_calls[0]["tokens_out"], 0) + + async def test_on_tool_pair_records_call(self) -> None: + with tempfile.TemporaryDirectory() as raw: + hooks = _make_hooks(Path(raw)) + tool = SimpleNamespace(name="read_file") + await hooks.on_tool_start( + SimpleNamespace(), SimpleNamespace(), tool + ) + await hooks.on_tool_end( + SimpleNamespace(), + SimpleNamespace(), + tool, + "file contents", + ) + self.assertEqual(len(hooks._tool_calls), 1) + self.assertEqual(hooks._tool_calls[0]["name"], "read_file") + self.assertGreaterEqual(hooks._tool_calls[0]["duration_s"], 0.0) + + async def test_on_agent_end_truncates_long_output(self) -> None: + with tempfile.TemporaryDirectory() as raw: + hooks = _make_hooks(Path(raw)) + big = "x" * 1000 + await hooks.on_agent_end(SimpleNamespace(), SimpleNamespace(), big) + self.assertTrue(hooks._output_summary.endswith("[truncated]")) + self.assertLessEqual(len(hooks._output_summary), 500) + + +class MemoryRunHooksPersistTests(unittest.IsolatedAsyncioTestCase): + async def test_persist_success_writes_record_with_no_error( + self, + ) -> None: + with tempfile.TemporaryDirectory() as raw: + mem_dir = Path(raw) + hooks = _make_hooks(mem_dir) + await hooks.on_agent_start( + SimpleNamespace(), + SimpleNamespace(name="a", model="gpt-4o", instructions="x"), + ) + await hooks.on_agent_end( + SimpleNamespace(), SimpleNamespace(), "out" + ) + with patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write: + await hooks.persist( + workflow_name="quantmind.paper_flow", + result=SimpleNamespace(trace_id="trace_xx"), + error=None, + input_payload="hello", + ) + args, _kwargs = mock_write.call_args + record = args[1] + self.assertEqual(record.workflow_name, "quantmind.paper_flow") + self.assertEqual(record.trace_id, "trace_xx") + self.assertIsNone(record.error) + + async def test_persist_error_path_records_exception_string( + self, + ) -> None: + with tempfile.TemporaryDirectory() as raw: + mem_dir = Path(raw) + hooks = _make_hooks(mem_dir) + with patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write: + await hooks.persist( + workflow_name="quantmind.paper_flow", + result=None, + error=ValueError("boom"), + input_payload="hi", + ) + record = mock_write.call_args.args[1] + self.assertEqual(record.error, "boom") From 6f9977170671b58dd7d3e368c423afc78ccff607 Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:51:57 +0800 Subject: [PATCH 04/11] feat(mind): FilesystemMemory MVP (PR6) Routes Agent file access through the SDK's MCPServerStdio (npx + @modelcontextprotocol/server-filesystem). run_hooks() returns a fresh MemoryRunHooks per call sharing the per-instance asyncio.Lock. reset() is destructive and refuses '/' and the user home directory. Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/mind/__init__.py | 4 +- quantmind/mind/memory/__init__.py | 4 +- quantmind/mind/memory/filesystem.py | 116 +++++++++++++++++++++++++++ tests/mind/memory/test_filesystem.py | 82 +++++++++++++++++++ 4 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 quantmind/mind/memory/filesystem.py create mode 100644 tests/mind/memory/test_filesystem.py diff --git a/quantmind/mind/__init__.py b/quantmind/mind/__init__.py index febb682..0a39d60 100644 --- a/quantmind/mind/__init__.py +++ b/quantmind/mind/__init__.py @@ -5,6 +5,6 @@ ``mind/summarize_run`` (trajectory summariser). """ -from quantmind.mind.memory import Memory +from quantmind.mind.memory import FilesystemMemory, Memory, MemoryRunHooks -__all__ = ["Memory"] +__all__ = ["FilesystemMemory", "Memory", "MemoryRunHooks"] diff --git a/quantmind/mind/memory/__init__.py b/quantmind/mind/memory/__init__.py index d7d070a..579b5b1 100644 --- a/quantmind/mind/memory/__init__.py +++ b/quantmind/mind/memory/__init__.py @@ -1,5 +1,7 @@ """quantmind.mind.memory — Memory Protocol + filesystem MVP backend.""" from quantmind.mind.memory._protocol import Memory +from quantmind.mind.memory._run_hooks import MemoryRunHooks +from quantmind.mind.memory.filesystem import FilesystemMemory -__all__ = ["Memory"] +__all__ = ["FilesystemMemory", "Memory", "MemoryRunHooks"] diff --git a/quantmind/mind/memory/filesystem.py b/quantmind/mind/memory/filesystem.py new file mode 100644 index 0000000..7bb8c77 --- /dev/null +++ b/quantmind/mind/memory/filesystem.py @@ -0,0 +1,116 @@ +"""``FilesystemMemory`` — MVP cross-step memory backed by an MCP filesystem server. + +Layout:: + + / + notes/ # agent's free-form working notes (markdown) + items/ # typed KnowledgeItem JSON (PR7+) + runs/ # trajectory archive — runs/.json + runs.jsonl # append-only run index + README.md # agent-facing usage guide + +Requires Node.js + ``npx`` on PATH (the SDK's ``MCPServerStdio`` spawns +``npx -y @modelcontextprotocol/server-filesystem``). ``__init__`` does +**not** pre-flight-check ``npx``; the SDK surfaces a clear error at run +time when it is missing. +""" + +import asyncio +import shutil +from pathlib import Path +from typing import Any + +from agents import RunHooks, Tool +from agents.mcp import MCPServer, MCPServerStdio + +from quantmind.mind.memory._run_hooks import MemoryRunHooks + +_AGENT_README_TEXT = """\ +# Memory directory for QuantMind flow run + +Available subdirectories: +- `notes/` — your free-form working notes (markdown). Read existing notes + before writing new ones. +- `items/` — structured KnowledgeItem JSON files emitted by previous runs. +- `runs/` — system-managed run trajectory logs (do not edit). +- `runs.jsonl` — append-only run index (system-managed, do not edit). + +Guidelines: +1. BEFORE doing your task, list `notes/` and `items/` to see relevant + prior context. +2. When you find a fact worth remembering, write a short markdown note + under `notes/`, named `_.md`. +3. Don't repeat work. If the same item is already in `items/`, prefer + to reference rather than re-extract. +""" + + +def _is_forbidden_path(path: Path) -> bool: + return path == Path("/") or path == Path.home() + + +class FilesystemMemory: + """Filesystem-backed cross-step memory using the MCP filesystem server. + + Constructed once per serial loop / batch; passed to ``paper_flow`` + via the ``memory=`` kwarg. Each ``run_hooks()`` invocation returns + a fresh ``MemoryRunHooks`` so per-run accumulator state is isolated; + they share the per-instance ``asyncio.Lock`` that serialises + ``runs.jsonl`` appends. + """ + + def __init__(self, memory_dir: str | Path) -> None: + self.memory_dir = Path(memory_dir).resolve() + if _is_forbidden_path(self.memory_dir): + raise ValueError( + "memory_dir must not be '/' or the user home directory; " + "choose a dedicated subdirectory." + ) + for sub in ("notes", "items", "runs"): + (self.memory_dir / sub).mkdir(parents=True, exist_ok=True) + readme = self.memory_dir / "README.md" + if not readme.exists(): + readme.write_text(_AGENT_README_TEXT, encoding="utf-8") + self._archive_lock = asyncio.Lock() + + def tools(self) -> list[Tool]: + return [] + + def mcp_servers(self) -> list[MCPServer]: + return [ + MCPServerStdio( + name="quantmind_memory_fs", + params={ + "command": "npx", + "args": [ + "-y", + "@modelcontextprotocol/server-filesystem", + str(self.memory_dir), + ], + }, + ) + ] + + def run_hooks(self) -> RunHooks[Any] | None: + return MemoryRunHooks( + memory_dir=self.memory_dir, + archive_lock=self._archive_lock, + ) + + async def reset(self) -> None: + """Wipe ``notes/``, ``items/``, ``runs/``, and ``runs.jsonl``. + + Destructive — irreversibly removes every file under those paths. + Re-creates the empty subdirectories and seeds the agent README + afterwards. + """ + for sub in ("notes", "items", "runs"): + shutil.rmtree(self.memory_dir / sub, ignore_errors=True) + runs_jsonl = self.memory_dir / "runs.jsonl" + if runs_jsonl.exists(): + runs_jsonl.unlink() + for sub in ("notes", "items", "runs"): + (self.memory_dir / sub).mkdir(parents=True, exist_ok=True) + readme = self.memory_dir / "README.md" + if not readme.exists(): + readme.write_text(_AGENT_README_TEXT, encoding="utf-8") diff --git a/tests/mind/memory/test_filesystem.py b/tests/mind/memory/test_filesystem.py new file mode 100644 index 0000000..921b08d --- /dev/null +++ b/tests/mind/memory/test_filesystem.py @@ -0,0 +1,82 @@ +"""Tests for ``quantmind.mind.memory.filesystem``.""" + +import tempfile +import unittest +from pathlib import Path + +from agents.mcp import MCPServerStdio + +from quantmind.mind.memory._run_hooks import MemoryRunHooks +from quantmind.mind.memory.filesystem import FilesystemMemory + + +class FilesystemMemoryInitTests(unittest.TestCase): + def test_creates_subdirs_and_readme(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + for sub in ("notes", "items", "runs"): + self.assertTrue((mem.memory_dir / sub).is_dir()) + self.assertTrue((mem.memory_dir / "README.md").exists()) + + def test_readme_only_seeded_once(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + (mem.memory_dir / "README.md").write_text("custom") + FilesystemMemory(raw) + self.assertEqual( + (mem.memory_dir / "README.md").read_text(), "custom" + ) + + def test_rejects_root_path(self) -> None: + with self.assertRaises(ValueError): + FilesystemMemory("/") + + def test_rejects_home_path(self) -> None: + with self.assertRaises(ValueError): + FilesystemMemory(str(Path.home())) + + +class FilesystemMemoryMethodsTests(unittest.TestCase): + def test_tools_returns_empty(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + self.assertEqual(mem.tools(), []) + + def test_mcp_servers_returns_stdio_with_resolved_path(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + servers = mem.mcp_servers() + self.assertEqual(len(servers), 1) + self.assertIsInstance(servers[0], MCPServerStdio) + + def test_mcp_servers_returns_fresh_instance_each_call(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + self.assertIsNot(mem.mcp_servers()[0], mem.mcp_servers()[0]) + + def test_run_hooks_returns_memory_run_hooks_with_lock(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + h1 = mem.run_hooks() + h2 = mem.run_hooks() + self.assertIsInstance(h1, MemoryRunHooks) + self.assertIsNot(h1, h2) + assert isinstance(h1, MemoryRunHooks) + assert isinstance(h2, MemoryRunHooks) + self.assertIs(h1._archive_lock, h2._archive_lock) + + +class FilesystemMemoryResetTests(unittest.IsolatedAsyncioTestCase): + async def test_reset_wipes_subdirs_and_jsonl(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + (mem.memory_dir / "notes" / "n.md").write_text("x") + (mem.memory_dir / "items" / "i.json").write_text("{}") + (mem.memory_dir / "runs" / "r.json").write_text("{}") + (mem.memory_dir / "runs.jsonl").write_text("{}\n") + await mem.reset() + for sub in ("notes", "items", "runs"): + self.assertTrue((mem.memory_dir / sub).is_dir()) + self.assertEqual(list((mem.memory_dir / sub).iterdir()), []) + self.assertFalse((mem.memory_dir / "runs.jsonl").exists()) + self.assertTrue((mem.memory_dir / "README.md").exists()) From 66dbb0d2b4daf7bc4e35214b48ca0fe1010ed04a Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:53:34 +0800 Subject: [PATCH 05/11] refactor(flows): wire MemoryRunHooks via try/finally (PR6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit run_with_observability now consumes Memory.run_hooks() and calls MemoryRunHooks.persist in finally so failed runs still archive. _collect_hooks and _archive_run_artifacts are gone — the runner holds the inline orchestration; persistence lives in mind/memory/_trajectory.write_run_record. Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/flows/_runner.py | 93 ++++++++++++------------ tests/flows/test_runner.py | 145 ++++++++++++++++++++++++++++++------- 2 files changed, 163 insertions(+), 75 deletions(-) diff --git a/quantmind/flows/_runner.py b/quantmind/flows/_runner.py index 1f3b507..9b020a2 100644 --- a/quantmind/flows/_runner.py +++ b/quantmind/flows/_runner.py @@ -2,9 +2,9 @@ `run_with_observability` wraps `Runner.run` with `RunConfig` derived from `BaseFlowCfg`, composes user-supplied `RunHooks` (the SDK accepts only a -single hooks instance per run), and leaves a no-op call site for the -PR6 trajectory archive. Flow modules call this instead of touching the -SDK directly so observability behaviour stays in one place. +single hooks instance per run), and orchestrates the +`MemoryRunHooks.persist()` call in `finally` so failed runs still +produce a trajectory record. """ from typing import Any @@ -12,6 +12,7 @@ from agents import Agent, RunConfig, RunHooks, Runner from quantmind.configs import BaseFlowCfg +from quantmind.mind.memory import Memory, MemoryRunHooks async def run_with_observability( @@ -19,7 +20,7 @@ async def run_with_observability( input: str | list[Any], *, cfg: BaseFlowCfg, - memory: object | None = None, + memory: Memory | None = None, extra_run_hooks: list[RunHooks[Any]], ) -> Any: """Build `RunConfig` + composed hooks, run the agent, return final output. @@ -30,12 +31,12 @@ async def run_with_observability( cfg: Flow configuration. Tracing fields and ``max_turns`` are forwarded to the SDK; ``workflow_name`` falls back to ``"quantmind."`` when unset. - memory: PR6 ``Memory`` placeholder. Currently unused at runtime; - the value is forwarded to the trajectory-archive stub so PR6 - can wire it in without changing call sites. - extra_run_hooks: User-supplied hooks. Composed with any - memory-derived hooks (none in PR5) into a single - ``RunHooks`` instance. + memory: Optional ``Memory`` implementation. When set and + ``cfg.archive_trajectory`` is True, ``memory.run_hooks()`` + participates in the run and ``MemoryRunHooks.persist()`` + is invoked in ``finally`` (so failures archive too). + extra_run_hooks: User-supplied hooks composed after the memory + hook. Returns: ``RunResult.final_output`` typed by the agent's ``output_type``. @@ -47,29 +48,40 @@ async def run_with_observability( trace_include_sensitive_data=cfg.trace_include_sensitive_data, tracing_disabled=cfg.tracing_disabled, ) - hooks = _compose_hooks(_collect_hooks(memory, extra_run_hooks)) - result = await Runner.run( - agent, - input, - run_config=run_cfg, - hooks=hooks, - max_turns=cfg.max_turns, - ) - _archive_run_artifacts(cfg, memory, result) - return result.final_output - -def _collect_hooks( - memory: object | None, - extras: list[RunHooks[Any]], -) -> list[RunHooks[Any]]: - """Return hooks in run order: memory hooks first (PR6), then extras.""" - hooks: list[RunHooks[Any]] = [] - # PR6 will append `memory.run_hooks()` here when `memory` exposes the - # `Memory` Protocol. PR5 keeps `memory` opaque and contributes no hooks. - del memory - hooks.extend(extras) - return hooks + memory_hooks: MemoryRunHooks | None = None + hooks_list: list[RunHooks[Any]] = [] + if memory is not None and cfg.archive_trajectory: + h = memory.run_hooks() + if h is not None: + hooks_list.append(h) + if isinstance(h, MemoryRunHooks): + memory_hooks = h + hooks_list.extend(extra_run_hooks) + composed = _compose_hooks(hooks_list) + + result: Any = None + error: BaseException | None = None + try: + result = await Runner.run( + agent, + input, + run_config=run_cfg, + hooks=composed, + max_turns=cfg.max_turns, + ) + return result.final_output + except BaseException as exc: + error = exc + raise + finally: + if memory_hooks is not None: + await memory_hooks.persist( + workflow_name=workflow_name, + result=result, + error=error, + input_payload=input, + ) def _compose_hooks( @@ -87,8 +99,7 @@ class _CompositeRunHooks(RunHooks[Any]): """Fan out every lifecycle method to each wrapped hook in order. Exceptions from earlier hooks short-circuit the rest by design — hooks - are integral to the run, not best-effort. PR6's archive hook should - catch its own exceptions internally if it wants resilience. + are integral to the run, not best-effort. """ def __init__(self, inner: list[RunHooks[Any]]) -> None: @@ -121,17 +132,3 @@ async def on_tool_start(self, *args: Any, **kwargs: Any) -> None: async def on_tool_end(self, *args: Any, **kwargs: Any) -> None: for h in self._inner: await h.on_tool_end(*args, **kwargs) - - -def _archive_run_artifacts( - cfg: BaseFlowCfg, - memory: object | None, - result: Any, -) -> None: - """No-op stub. PR6 writes a trajectory record under ``/runs/``. - - Kept as a real call site (rather than commented-out) so PR6 changes - one function body, not the runner's public path. - """ - del cfg, memory, result - return None diff --git a/tests/flows/test_runner.py b/tests/flows/test_runner.py index 4ece594..65dab48 100644 --- a/tests/flows/test_runner.py +++ b/tests/flows/test_runner.py @@ -1,5 +1,6 @@ """Tests for ``quantmind.flows._runner``.""" +import tempfile import unittest from typing import Any from unittest.mock import AsyncMock, MagicMock, patch @@ -8,12 +9,11 @@ from quantmind.configs import PaperFlowCfg from quantmind.flows._runner import ( - _archive_run_artifacts, - _collect_hooks, _compose_hooks, _CompositeRunHooks, run_with_observability, ) +from quantmind.mind.memory import FilesystemMemory class _RecordingHooks(RunHooks[Any]): @@ -45,6 +45,12 @@ async def on_tool_end(self, *_: Any, **__: Any) -> None: self.log.append((self.label, "on_tool_end")) +class _FakeRunResult: + def __init__(self, output: str = "ok") -> None: + self.final_output = output + self.trace_id = "trace_xyz" + + class ComposeHooksTests(unittest.TestCase): def test_empty_returns_none(self) -> None: self.assertIsNone(_compose_hooks([])) @@ -73,7 +79,6 @@ async def test_fan_out_in_registration_order(self) -> None: await composite.on_handoff() await composite.on_tool_start() await composite.on_tool_end() - # Each method fires for both hooks in registration order. for method in ( "on_llm_start", "on_llm_end", @@ -100,26 +105,6 @@ async def on_llm_start(self, *_: Any, **__: Any) -> None: self.assertEqual(log, []) -class CollectHooksTests(unittest.TestCase): - def test_memory_contributes_nothing_in_pr5(self) -> None: - extra = _RecordingHooks("a", []) - # PR5: memory is forwarded but unused. - self.assertEqual(_collect_hooks(None, [extra]), [extra]) - self.assertEqual(_collect_hooks(object(), [extra]), [extra]) - - def test_no_extras_returns_empty(self) -> None: - self.assertEqual(_collect_hooks(None, []), []) - - -class ArchiveStubTests(unittest.TestCase): - def test_archive_is_no_op(self) -> None: - cfg = PaperFlowCfg() - result = MagicMock() - # Must not raise, must return None, must not touch result. - self.assertIsNone(_archive_run_artifacts(cfg, None, result)) - result.assert_not_called() - - class RunWithObservabilityTests(unittest.IsolatedAsyncioTestCase): async def test_run_config_built_from_cfg(self) -> None: cfg = PaperFlowCfg( @@ -156,11 +141,10 @@ async def test_run_config_built_from_cfg(self) -> None: self.assertEqual(run_cfg.trace_metadata, {"k": "v"}) self.assertFalse(run_cfg.trace_include_sensitive_data) self.assertTrue(run_cfg.tracing_disabled) - # No hooks supplied -> Runner.run sees None. self.assertIsNone(call.kwargs["hooks"]) async def test_workflow_name_falls_back_to_agent_name(self) -> None: - cfg = PaperFlowCfg() # workflow_name = None + cfg = PaperFlowCfg() agent = MagicMock() agent.name = "paper_extractor" fake_result = MagicMock() @@ -192,8 +176,115 @@ async def test_extra_hooks_forwarded(self) -> None: agent, "x", cfg=cfg, - memory=object(), # PR6 placeholder + memory=None, extra_run_hooks=[hook], ) - # Single hook -> passed through as-is, not wrapped in composite. self.assertIs(run_mock.await_args.kwargs["hooks"], hook) + + +class RunnerMemoryWiringTests(unittest.IsolatedAsyncioTestCase): + async def test_persist_invoked_on_success(self) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + cfg = PaperFlowCfg() + agent = MagicMock() + agent.name = "paper_extractor" + with ( + patch( + "quantmind.flows._runner.Runner.run", + new=AsyncMock(return_value=_FakeRunResult("done")), + ), + patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write, + ): + out = await run_with_observability( + agent, + "input", + cfg=cfg, + memory=mem, + extra_run_hooks=[], + ) + self.assertEqual(out, "done") + self.assertEqual(mock_write.await_count, 1) + record = mock_write.await_args.args[1] + self.assertIsNone(record.error) + self.assertEqual(record.trace_id, "trace_xyz") + + async def test_persist_invoked_on_failure_with_error_string( + self, + ) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + cfg = PaperFlowCfg() + agent = MagicMock() + agent.name = "paper_extractor" + boom = RuntimeError("boom") + with ( + patch( + "quantmind.flows._runner.Runner.run", + new=AsyncMock(side_effect=boom), + ), + patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write, + ): + with self.assertRaises(RuntimeError): + await run_with_observability( + agent, + "input", + cfg=cfg, + memory=mem, + extra_run_hooks=[], + ) + self.assertEqual(mock_write.await_count, 1) + record = mock_write.await_args.args[1] + self.assertEqual(record.error, "boom") + + async def test_archive_trajectory_false_skips_memory_hooks( + self, + ) -> None: + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + cfg = PaperFlowCfg(archive_trajectory=False) + agent = MagicMock() + agent.name = "paper_extractor" + with ( + patch( + "quantmind.flows._runner.Runner.run", + new=AsyncMock(return_value=_FakeRunResult()), + ), + patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write, + ): + await run_with_observability( + agent, + "input", + cfg=cfg, + memory=mem, + extra_run_hooks=[], + ) + self.assertEqual(mock_write.await_count, 0) + + async def test_no_memory_skips_persist(self) -> None: + cfg = PaperFlowCfg() + agent = MagicMock() + agent.name = "paper_extractor" + with ( + patch( + "quantmind.flows._runner.Runner.run", + new=AsyncMock(return_value=_FakeRunResult()), + ), + patch( + "quantmind.mind.memory._run_hooks.write_run_record", + new=AsyncMock(), + ) as mock_write, + ): + await run_with_observability( + agent, "input", cfg=cfg, memory=None, extra_run_hooks=[] + ) + self.assertEqual(mock_write.await_count, 0) From 343a53bb24ef9caa35dacf009213921c2eacc9ac Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:55:52 +0800 Subject: [PATCH 06/11] refactor(paper_flow): tighten memory: Memory | None and wire mcp/tools (PR6) paper_flow now imports the Memory Protocol from quantmind.mind.memory. memory.mcp_servers() and memory.tools() flow through to the Agent unconditionally; the cfg.archive_trajectory knob is about persistence only, not memory access. The PR5 placeholder test test_memory_accepted_as_no_op is removed (replaced by PaperFlowMemoryWiringTests covering the real wiring). Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/flows/paper.py | 15 ++++-- tests/flows/test_paper.py | 98 ++++++++++++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/quantmind/flows/paper.py b/quantmind/flows/paper.py index 5b8373b..bf5e31f 100644 --- a/quantmind/flows/paper.py +++ b/quantmind/flows/paper.py @@ -26,6 +26,7 @@ ) from quantmind.flows._runner import run_with_observability from quantmind.knowledge import Paper +from quantmind.mind.memory import Memory from quantmind.preprocess.fetch import ( Fetched, fetch_arxiv, @@ -68,15 +69,17 @@ async def paper_flow( extra_tools: list[Tool] | None = None, extra_instructions: str | None = None, output_type: type[P] | None = None, - memory: object | None = None, + memory: Memory | None = None, extra_run_hooks: list[RunHooks[Any]] | None = None, extra_input_guardrails: list[Any] | None = None, extra_output_guardrails: list[Any] | None = None, ) -> P | Paper: """Extract a ``Paper`` from a typed ``PaperInput``. - See design doc §4.1 for the rationale on each kwarg. ``memory`` is a - PR6 placeholder — non-None values are accepted but unused in PR5. + See design doc §4.1 for the rationale on each kwarg. When ``memory`` + is supplied, ``memory.mcp_servers()`` and ``memory.tools()`` flow + through to the Agent unconditionally; trajectory archiving is gated + separately by ``cfg.archive_trajectory`` inside the runner. Raises: UnsupportedContentTypeError: When fetched bytes are not PDF / @@ -89,6 +92,9 @@ async def paper_flow( raw_md, source_meta = await _fetch_and_format(input) + mcp_servers = memory.mcp_servers() if memory is not None else [] + memory_tools = memory.tools() if memory is not None else [] + # Agent's `model_settings` parameter is non-optional (defaults to a # fresh ``ModelSettings()``); only forward when cfg has one set. agent_kwargs: dict[str, Any] = { @@ -97,7 +103,8 @@ async def paper_flow( _DEFAULT_INSTRUCTIONS, extra_instructions, cfg ), "model": cfg.model, - "tools": list(extra_tools or []), + "tools": [*(extra_tools or []), *memory_tools], + "mcp_servers": mcp_servers, "output_type": out_type, "input_guardrails": list(extra_input_guardrails or []), "output_guardrails": list(extra_output_guardrails or []), diff --git a/tests/flows/test_paper.py b/tests/flows/test_paper.py index 2a143c7..ef3178b 100644 --- a/tests/flows/test_paper.py +++ b/tests/flows/test_paper.py @@ -299,18 +299,6 @@ def _capture_agent(*_a: Any, **kwargs: Any) -> Any: self.assertEqual(seen["input_guardrails"], [in_g]) self.assertEqual(seen["output_guardrails"], [out_g]) - async def test_memory_accepted_as_no_op(self) -> None: - with ( - patch( - "quantmind.flows.paper.Agent", - return_value=MagicMock(), - ), - _patch_runner(_stub_paper()) as runner, - ): - await paper_flow(RawText(text="x"), memory=object()) - # The runner sees the memory placeholder forwarded. - self.assertIsNotNone(runner.await_args.kwargs["memory"]) - async def test_extra_run_hooks_forwarded(self) -> None: class _H(RunHooks[Any]): pass @@ -357,3 +345,89 @@ def _capture_agent(*_a: Any, **kwargs: Any) -> Any: ): await paper_flow(RawText(text="x")) self.assertNotIn("model_settings", seen) + + +class PaperFlowMemoryWiringTests(unittest.IsolatedAsyncioTestCase): + async def test_memory_mcp_servers_passed_to_agent(self) -> None: + import tempfile + + from quantmind.mind.memory import FilesystemMemory + + seen: dict[str, Any] = {} + + def _capture_agent(*_a: Any, **kwargs: Any) -> Any: + seen.update(kwargs) + return MagicMock() + + with tempfile.TemporaryDirectory() as raw: + mem = FilesystemMemory(raw) + with ( + patch( + "quantmind.flows.paper.Agent", + side_effect=_capture_agent, + ), + _patch_runner(_stub_paper()), + ): + await paper_flow(RawText(text="hello"), memory=mem) + self.assertEqual(len(seen["mcp_servers"]), 1) + + async def test_no_memory_yields_empty_mcp_servers_and_tools( + self, + ) -> None: + seen: dict[str, Any] = {} + + def _capture_agent(*_a: Any, **kwargs: Any) -> Any: + seen.update(kwargs) + return MagicMock() + + with ( + patch("quantmind.flows.paper.Agent", side_effect=_capture_agent), + _patch_runner(_stub_paper()), + ): + await paper_flow(RawText(text="hello")) + self.assertEqual(seen["mcp_servers"], []) + self.assertEqual(seen["tools"], []) + + async def test_memory_tools_appended_after_extra_tools(self) -> None: + from agents import function_tool + + @function_tool + def _extra_tool() -> str: + return "" + + @function_tool + def _memory_tool() -> str: + return "" + + class _ToolMemory: + def tools(self) -> list: + return [_memory_tool] + + def mcp_servers(self) -> list: + return [] + + def run_hooks(self) -> Any: + return None + + async def reset(self) -> None: + return None + + seen: dict[str, Any] = {} + + def _capture_agent(*_a: Any, **kwargs: Any) -> Any: + seen.update(kwargs) + return MagicMock() + + with ( + patch("quantmind.flows.paper.Agent", side_effect=_capture_agent), + _patch_runner(_stub_paper()), + ): + await paper_flow( + RawText(text="x"), + extra_tools=[_extra_tool], + memory=_ToolMemory(), + ) + # Order: extra_tools first, then memory_tools. + self.assertEqual(len(seen["tools"]), 2) + self.assertIs(seen["tools"][0], _extra_tool) + self.assertIs(seen["tools"][1], _memory_tool) From 2e2f16f0f67deb41081e1ef6a037dbde9c515d1e Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:56:30 +0800 Subject: [PATCH 07/11] chore(import-linter): pin mind/ as bounded subsystem (PR6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sixth contract forbids mind from importing flows, magic, or any of the deleted transitional packages (tripwires). flows depends on mind, so the reverse would create a cycle — the contract makes that fact unmissable. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 50d6fe9..270ded0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,6 +188,22 @@ forbidden_modules = [ "quantmind.models", ] +[[tool.importlinter.contracts]] +name = "mind only depends on knowledge/configs/preprocess/utils + agents SDK" +type = "forbidden" +source_modules = ["quantmind.mind"] +# `mind` MUST NOT import `flows` or `magic` (those depend on `mind`, +# so the reverse would create a cycle). The deleted transitional +# packages remain as tripwires against accidental re-introduction. +forbidden_modules = [ + "quantmind.flows", + "quantmind.magic", + "quantmind.config", + "quantmind.flow", + "quantmind.llm", + "quantmind.models", +] + # ---------------------------------------------------------------------------- # pytest: configuration + coverage gate # ---------------------------------------------------------------------------- From 5bf7eccd9fb2ada7874a5b4ef49565e644409231 Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 01:58:16 +0800 Subject: [PATCH 08/11] docs(mind): runbook + state table for FilesystemMemory (PR6) README has a serial-loop runbook example showing the npx requirement and trajectory archive output, plus a clarification that batch_run rejects memory= by design. CLAUDE.md state table records the landed mind/memory/ module + sixth import-linter contract; roadmap promotes PR6 to "this PR" and keeps PR7+ as the next step. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 24 +++++++++++---------- README.md | 62 +++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2c790ec..f7829ca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,31 +29,33 @@ quantmind/ Key principle: QuantMind does NOT rebuild Agent runtime, lifecycle hooks, tracing, multi-agent handoff, or tool framework. Those come from `openai-agents`. -## Current Repository State (after PR #70 / #73 / #74 / #75 / PR5) +## Current Repository State (after PR #70 / #73 / #74 / #75 / #76 / PR6) | Module | Status | Notes | |--------|--------|-------| | `quantmind/knowledge/` | landed (PR3) | data standard with three shapes: `FlattenKnowledge` (`News` / `Earnings` / `PaperKnowledgeCard`), `TreeKnowledge` (`Paper`), `GraphKnowledge` (placeholder); shared base = `BaseKnowledge` with typed `SourceRef` / `ExtractionRef` provenance + `embedding_text()` contract | | `quantmind/configs/` | landed (PR3) | `BaseFlowCfg` / `BaseInput` + per-flow cfg + discriminated-union input types | | `quantmind/preprocess/` | landed (PR4) | `fetch/` (`fetch_arxiv` / `fetch_url` / `resolve_doi` / `read_local_file` returning `Fetched` / `RawPaper` / `CrossrefMetadata` frozen dataclasses) + `format/` (`pdf_to_markdown` via PyMuPDF, `html_to_markdown` via trafilatura) + `clean.py` + `time.py`; leaf module — only depends on `quantmind.utils` | -| `quantmind/flows/` | landed (PR5) | apex layer: `paper_flow` (`PaperInput` → `Paper` via SDK Agent), `batch_run` + `BatchResult` (bounded-concurrency fan-out, `memory=` rejected by design), `_runner.run_with_observability` + `_compose_hooks` + `_archive_run_artifacts` (PR6 stub); only depends on configs/knowledge/preprocess/utils + `agents` SDK | +| `quantmind/flows/` | landed (PR5, refined PR6) | apex layer: `paper_flow` (`PaperInput` → `Paper` via SDK Agent, now wired with `memory.mcp_servers()` + `memory.tools()`), `batch_run` + `BatchResult` (bounded-concurrency fan-out, `memory=` rejected by design), `_runner.run_with_observability` (PR6: `try/finally` invokes `MemoryRunHooks.persist`); depends on configs/knowledge/preprocess/utils + `mind` + `agents` SDK | | `quantmind/magic.py` | landed (PR5) | `resolve_magic_input(natural_language, *, target_flow, ...) -> (input, cfg)` plus `preview_resolve` debug helper; introspects flow signatures and runs a lightweight resolver Agent with `output_type=ResolvedFlowConfig[InputT, CfgT]` | +| `quantmind/mind/memory/` | landed (PR6) | `Memory` Protocol (granular: `tools()` / `mcp_servers()` / `run_hooks()` / `reset()`) + `FilesystemMemory` MVP (MCP filesystem server via `npx`) + `MemoryRunHooks` accumulator + `RunRecord` trajectory archive under `/runs/` (atomic write + `runs.jsonl` index); sixth import-linter contract pins `mind` as bounded | | `quantmind/utils/logger.py` | permanent | only general-purpose utility | PR5 removed the transitional packages (`quantmind/{flow,llm,config,models}/` and their tests under `tests/{config,models}/`); PR4 had already removed `quantmind/parsers/`, `quantmind/sources/`, and `quantmind/utils/tmp.py`. -The codebase has now converged to the five permanent module roots -(`flows/`, `configs/`, `knowledge/`, `preprocess/`, `mind/`) plus -`magic.py` and `utils/`. +PR6 added `quantmind/mind/memory/` and tightened the `paper_flow` / +`_runner` signatures to consume the `Memory` Protocol directly. + +The codebase now has six permanent module roots (`flows/`, `configs/`, +`knowledge/`, `preprocess/`, `mind/`) plus `magic.py` and `utils/`. `basedpyright` runs in standard mode across the whole `quantmind/` -package — there are no per-module exclusions left. Five `import-linter` +package — there are no per-module exclusions left. Six `import-linter` contracts pin the dependency graph: `utils` and `knowledge` are leaves, `configs` only depends on `knowledge`, `preprocess` only depends on -`utils`, and `flows + magic` is the apex (cannot import the deleted -transitional packages, which are listed in the contract as a tripwire -against accidental re-introduction). +`utils`, `flows + magic` is the apex, and `mind` is a bounded subsystem +(cannot import `flows` / `magic` / the deleted transitional packages). ## Development Commands @@ -173,7 +175,7 @@ issue instead. | #73 (merged) | Golden Harness — `scripts/verify.sh` with ruff + basedpyright + import-linter + pytest --cov, plus matching CI | | #74 (merged) | `knowledge/` data standard (Flatten / Tree / Graph shapes) + `configs/` skeleton; `openai-agents>=0.14` introduced for `BaseFlowCfg.model_settings` | | #75 (merged) | `preprocess/` (fetch + format two layers); deletes `parsers/` + `sources/` + `utils/tmp.py`; coverage floor 60→65; 4th import-linter contract | -| PR5 (this PR) | `flows/` (`paper_flow` + `batch_run` + `BatchResult` + `_runner`) + `magic.py`; deletes `quantmind/{flow,llm,config,models}/`; coverage floor 65→75; 5th import-linter contract pins `flows + magic` as apex | -| PR6 | `mind/memory/filesystem` MVP + trajectory archive (fills `_archive_run_artifacts` stub) | +| #76 (merged) | `flows/` (`paper_flow` + `batch_run` + `BatchResult` + `_runner`) + `magic.py`; deletes `quantmind/{flow,llm,config,models}/`; coverage floor 65→75; 5th import-linter contract pins `flows + magic` as apex | +| PR6 (this PR) | `mind/memory/filesystem` MVP + trajectory archive: `Memory` Protocol + `FilesystemMemory` (MCP filesystem server) + `MemoryRunHooks` + `RunRecord`; tightens `paper_flow.memory` + `_runner` to consume `Memory \| None`; replaces the PR5 `_archive_run_artifacts` stub with `try/finally` invoking `MemoryRunHooks.persist`; 6th import-linter contract pins `mind` boundaries | | PR7 | `mind/store/` + SQLite + `sqlite-vec` MVP; introduces `preprocess/chunk.py` with `tiktoken` | | PR8+ | Second flow (news/earnings) / observability cookbook / longer-term modules | diff --git a/README.md b/README.md index fb6390f..9d16605 100644 --- a/README.md +++ b/README.md @@ -224,10 +224,43 @@ async def main() -> None: asyncio.run(main()) ``` +#### Persistent memory across a serial loop + +```python +import asyncio + +from quantmind.configs.paper import ArxivIdentifier +from quantmind.flows import paper_flow +from quantmind.mind.memory import FilesystemMemory + + +async def main() -> None: + mem = FilesystemMemory("./.qm-memory") + arxiv_ids = ["2401.12345", "2402.67890", "2403.11111"] + for paper_id in arxiv_ids: + paper = await paper_flow( + ArxivIdentifier(id=paper_id), + memory=mem, + ) + print(paper.title) + # Trajectory records are now under ./.qm-memory/runs/. + + +asyncio.run(main()) +``` + +`FilesystemMemory` requires Node.js + `npx` on PATH (the SDK launches +`@modelcontextprotocol/server-filesystem` over stdio). Each run also +writes `/runs/.json` and appends a one-line +summary to `/runs.jsonl`. `FilesystemMemory` is for serial +loops only — `batch_run` rejects `memory=` at the signature layer +(see design doc §4.3.5). + > **Note**: QuantMind is mid-migration to OpenAI Agents SDK -> (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR5 lands the -> apex layer (`flows/` + `magic.py`); the remaining work is the `mind/` -> memory + store layer scheduled for PR6 and PR7. +> (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR6 lands +> `mind/memory/` (Memory Protocol + `FilesystemMemory` MVP + trajectory +> archive); the remaining work is the `mind/store/` knowledge layer +> scheduled for PR7+. --- @@ -235,10 +268,11 @@ asyncio.run(main()) - [x] Better `flow` design for user-friendly usage - [x] First production level example (Quant Paper Agent) -- [ ] Migrate Agent layer to OpenAI Agents SDK -- [ ] Standardize knowledge format with `knowledge/` (Pydantic-based) +- [x] Migrate Agent layer to OpenAI Agents SDK +- [x] Standardize knowledge format with `knowledge/` (Pydantic-based) +- [x] Cross-step working memory (`mind/memory`) for serial document processing - [ ] Additional content sources (financial news, blogs, reports) -- [ ] Cross-step working memory (`mind/memory`) for batch document processing +- [ ] `mind/store/` — durable knowledge store with hybrid retrieval (PR7+) --- @@ -252,18 +286,10 @@ QuantMind is designed with a larger vision: to become a comprehensive intelligen The foundation we're building today—starting with papers—will expand to encompass the entire financial information ecosystem. > [!NOTE] -> **Future Conceptual Example (PR6 brings `FilesystemMemory`):** -> -> ```python -> from quantmind.configs.paper import ArxivIdentifier -> from quantmind.flows import paper_flow -> from quantmind.knowledge import Paper -> from quantmind.mind.memory import FilesystemMemory # PR6 -> -> memory = FilesystemMemory("./mem/factor-research/") -> for arxiv_id in arxiv_ids: -> paper: Paper = await paper_flow(ArxivIdentifier(id=arxiv_id), memory=memory) -> ``` +> **`FilesystemMemory` landed in PR6.** See the runbook example above +> (*Persistent memory across a serial loop*) for the canonical usage. +> Future cognitive layers (`mind/store`, `mind/summarize_run`) build on +> this foundation — they share the same `/` layout. This future state represents our commitment to moving beyond simple data aggregation and toward genuine machine intelligence in the financial domain. From d5f0c89caf78c6e05f4ee4a662ed336181148776 Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 02:00:03 +0800 Subject: [PATCH 09/11] fix(mind): align RunHooks override signatures with SDK base (PR6) basedpyright caught two issues: - _safe_repr's dump() call returned object (not str) because getattr loses the typed signature; wrap with str() to satisfy the return type. - Lifecycle override parameter names must match RunHooksBase (context, not ctx) for reportIncompatibleMethodOverride. Co-Authored-By: Claude Opus 4.7 (1M context) --- quantmind/mind/memory/_run_hooks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/quantmind/mind/memory/_run_hooks.py b/quantmind/mind/memory/_run_hooks.py index c89f624..a61dca1 100644 --- a/quantmind/mind/memory/_run_hooks.py +++ b/quantmind/mind/memory/_run_hooks.py @@ -45,7 +45,7 @@ def _safe_repr(obj: Any) -> str: dump = getattr(obj, "model_dump_json", None) if callable(dump): try: - return dump() + return str(dump()) except Exception: # noqa: BLE001 pass return str(obj) @@ -75,7 +75,7 @@ def __init__(self, *, memory_dir: Path, archive_lock: asyncio.Lock) -> None: self._llm_timer_start: float | None = None self._tool_timer_starts: dict[int, float] = {} - async def on_agent_start(self, ctx: Any, agent: Any) -> None: + async def on_agent_start(self, context: Any, agent: Any) -> None: self._started_at = datetime.now(timezone.utc) self._agent_name = str(getattr(agent, "name", "") or "") self._agent_model = str(getattr(agent, "model", "") or "") @@ -86,7 +86,7 @@ async def on_agent_start(self, ctx: Any, agent: Any) -> None: async def on_llm_start(self, *_: Any, **__: Any) -> None: self._llm_timer_start = time.monotonic() - async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None: + async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None: duration = ( (time.monotonic() - self._llm_timer_start) if self._llm_timer_start is not None @@ -106,11 +106,11 @@ async def on_llm_end(self, ctx: Any, agent: Any, response: Any) -> None: } ) - async def on_tool_start(self, ctx: Any, agent: Any, tool: Any) -> None: + async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None: self._tool_timer_starts[id(tool)] = time.monotonic() async def on_tool_end( - self, ctx: Any, agent: Any, tool: Any, result: Any + self, context: Any, agent: Any, tool: Any, result: Any ) -> None: start = self._tool_timer_starts.pop(id(tool), None) duration = (time.monotonic() - start) if start is not None else 0.0 @@ -123,7 +123,7 @@ async def on_tool_end( } ) - async def on_agent_end(self, ctx: Any, agent: Any, output: Any) -> None: + async def on_agent_end(self, context: Any, agent: Any, output: Any) -> None: self._ended_at = datetime.now(timezone.utc) self._output_summary = _truncate(_safe_repr(output)) From 102c5be20eb006a12ff9e3b405e389ad70822f0f Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 02:06:12 +0800 Subject: [PATCH 10/11] docs: strip internal design-doc references from shipped surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The next-step-architecture.md design doc is local-only (gitignored) and shouldn't be referenced from any shipped docstring, comment, error message, or end-user docs. This commit removes every such reference (5 in quantmind/, 1 in README.md, 1 in CLAUDE.md) without losing any user-facing meaning — the surrounding text still says what the constraint is, just without the dangling doc pointer. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 2 +- README.md | 3 +-- quantmind/flows/batch.py | 7 +++---- quantmind/flows/paper.py | 10 +++++----- quantmind/mind/memory/_protocol.py | 7 +++---- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f7829ca..535383c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -152,7 +152,7 @@ issue instead. - ❌ Introduce class-based `BaseFlow` / plugin registry / hook discovery - ❌ Wrap `from agents import ...` in a QuantMind-side facade — use the SDK directly - ❌ Mix `batch_run` and `memory` (mutually exclusive in MVP; `batch_run` rejects - `memory=` at the signature layer — design doc §4.3.5) + `memory=` at the signature layer) - ❌ Use `Dict[str, Any]` in init functions; use Pydantic models - ❌ Add hard deps on observability platforms (Langfuse / Logfire / etc.); document integration via `add_trace_processor()` in user-facing cookbook only diff --git a/README.md b/README.md index 9d16605..cab9e1c 100644 --- a/README.md +++ b/README.md @@ -253,8 +253,7 @@ asyncio.run(main()) `@modelcontextprotocol/server-filesystem` over stdio). Each run also writes `/runs/.json` and appends a one-line summary to `/runs.jsonl`. `FilesystemMemory` is for serial -loops only — `batch_run` rejects `memory=` at the signature layer -(see design doc §4.3.5). +loops only — `batch_run` rejects `memory=` at the signature layer. > **Note**: QuantMind is mid-migration to OpenAI Agents SDK > (see [#71](https://github.com/LLMQuant/quant-mind/issues/71)). PR6 lands diff --git a/quantmind/flows/batch.py b/quantmind/flows/batch.py index 9e6c659..48c1309 100644 --- a/quantmind/flows/batch.py +++ b/quantmind/flows/batch.py @@ -2,8 +2,8 @@ `batch_run` is the single concurrency primitive QuantMind ships in MVP. It does NOT support `memory=`; for memory-accumulating workflows users -write a serial `for` loop themselves (design doc §4.3.5). This keeps the -batch path stateless and free of cross-run race hazards. +write a serial `for` loop themselves. This keeps the batch path +stateless and free of cross-run race hazards. """ import asyncio @@ -93,8 +93,7 @@ async def batch_run( raise ValueError( "batch_run does not support `memory=` in MVP. For " "memory-accumulating workflows write a serial loop instead: " - "`for inp in inputs: await flow_fn(inp, cfg=cfg, memory=memory)`. " - "See design doc §4.3.5." + "`for inp in inputs: await flow_fn(inp, cfg=cfg, memory=memory)`." ) if concurrency < 1: raise ValueError(f"concurrency must be >= 1, got {concurrency}") diff --git a/quantmind/flows/paper.py b/quantmind/flows/paper.py index bf5e31f..b58a786 100644 --- a/quantmind/flows/paper.py +++ b/quantmind/flows/paper.py @@ -8,7 +8,7 @@ Customization happens through the configured ``PaperFlowCfg`` (Layer 1) or the keyword arguments on this function (Layer 2). To swap the whole -flow, fork this file (Layer 3 — design doc §9). +flow, fork this file (Layer 3). """ from typing import Any, TypeVar @@ -76,10 +76,10 @@ async def paper_flow( ) -> P | Paper: """Extract a ``Paper`` from a typed ``PaperInput``. - See design doc §4.1 for the rationale on each kwarg. When ``memory`` - is supplied, ``memory.mcp_servers()`` and ``memory.tools()`` flow - through to the Agent unconditionally; trajectory archiving is gated - separately by ``cfg.archive_trajectory`` inside the runner. + When ``memory`` is supplied, ``memory.mcp_servers()`` and + ``memory.tools()`` flow through to the Agent unconditionally; + trajectory archiving is gated separately by + ``cfg.archive_trajectory`` inside the runner. Raises: UnsupportedContentTypeError: When fetched bytes are not PDF / diff --git a/quantmind/mind/memory/_protocol.py b/quantmind/mind/memory/_protocol.py index 6f0038f..ab85219 100644 --- a/quantmind/mind/memory/_protocol.py +++ b/quantmind/mind/memory/_protocol.py @@ -2,10 +2,9 @@ Each method has its own narrow surface so concrete implementations can opt in to whichever channel(s) they need (in-process tools, MCP servers, -lifecycle hooks). The Protocol does NOT prescribe MCP — see design doc -\u00a711.2 for the rationale: future backends like an embedding-based -``ChromaMemory`` are tool-only, while the MVP ``FilesystemMemory`` is -MCP-based. +lifecycle hooks). The Protocol does NOT prescribe MCP: a future +embedding-based ``ChromaMemory`` could be tool-only, while the MVP +``FilesystemMemory`` is MCP-based — both satisfy this same Protocol. """ from typing import Any, Protocol, runtime_checkable From 08e068ba5cedcc853e8d3e7ede2f8cc73ffd87cb Mon Sep 17 00:00:00 2001 From: pkuwkl Date: Fri, 8 May 2026 14:31:01 +0800 Subject: [PATCH 11/11] docs(examples): runnable mind/memory walkthroughs (PR6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four self-contained scripts under examples/memory/ that exercise FilesystemMemory + MemoryRunHooks end to end: 01_basic.py — shortest possible memory run; show disk layout 02_serial_loop.py — N-input serial loop sharing one memory_dir 03_inspect_trajectory — disk-only post-run analysis (no API needed) 04_custom_run_hooks — compose your own RunHooks via extra_run_hooks= README.md is the index. ruff per-file-ignores skip docstring rules (D-series) for examples/ — module-level docstring is enough for short demos. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/memory/01_basic.py | 62 ++++++++++++++++ examples/memory/02_serial_loop.py | 75 +++++++++++++++++++ examples/memory/03_inspect_trajectory.py | 59 +++++++++++++++ examples/memory/04_custom_run_hooks.py | 95 ++++++++++++++++++++++++ examples/memory/README.md | 45 +++++++++++ pyproject.toml | 3 + 6 files changed, 339 insertions(+) create mode 100644 examples/memory/01_basic.py create mode 100644 examples/memory/02_serial_loop.py create mode 100644 examples/memory/03_inspect_trajectory.py create mode 100644 examples/memory/04_custom_run_hooks.py create mode 100644 examples/memory/README.md diff --git a/examples/memory/01_basic.py b/examples/memory/01_basic.py new file mode 100644 index 0000000..47881c8 --- /dev/null +++ b/examples/memory/01_basic.py @@ -0,0 +1,62 @@ +"""01 — The shortest memory run. + +Builds a ``FilesystemMemory``, runs ``paper_flow`` once with it, then +prints what the trajectory archive looks like on disk afterwards. + +What to look for: + +- ``./.qm-memory/`` is created with ``notes/`` ``items/`` ``runs/`` and + a seeded ``README.md`` (the agent's own usage guide for the dir). +- One ``runs/.json`` file lands per ``paper_flow`` call. +- ``runs.jsonl`` gets one new line per call (a denormalised index of + the per-run files — handy for `jq` / `pandas`). + +Prerequisites: OPENAI_API_KEY in env, Node.js + npx on PATH. + +Run: + python examples/memory/01_basic.py +""" + +import asyncio +from pathlib import Path + +from quantmind.configs.paper import RawText +from quantmind.flows import paper_flow +from quantmind.mind.memory import FilesystemMemory + + +async def main() -> None: + memory_dir = Path("./.qm-memory") + mem = FilesystemMemory(memory_dir) + + # A tiny paper-shaped input keeps cost low; swap in + # ``ArxivIdentifier(id="...")`` to see a real extraction end to end. + paper = await paper_flow( + RawText( + text=( + "# Toy paper\n\n" + "Title: Cross-sectional momentum, simplified\n" + "Author: Demo\n\n" + "Body: We illustrate momentum on a 3-stock universe." + ) + ), + memory=mem, + ) + print(f"Extracted paper: {paper.title!r}") + + # Show what landed under .qm-memory/. + print(f"\nMemory layout under {memory_dir}:") + for child in sorted(memory_dir.rglob("*")): + if child.is_file(): + print( + f" {child.relative_to(memory_dir)} ({child.stat().st_size}B)" + ) + + runs_jsonl = memory_dir / "runs.jsonl" + if runs_jsonl.exists(): + print(f"\nLast line of {runs_jsonl}:") + print(runs_jsonl.read_text().splitlines()[-1][:200] + " ...") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/memory/02_serial_loop.py b/examples/memory/02_serial_loop.py new file mode 100644 index 0000000..c8968b6 --- /dev/null +++ b/examples/memory/02_serial_loop.py @@ -0,0 +1,75 @@ +"""02 — Serial loop sharing one FilesystemMemory. + +The point of cross-step memory is letting run N see what runs 1..N-1 +left behind. ``FilesystemMemory`` exposes ``notes/`` and ``items/`` to +the Agent through an MCP filesystem server, so the Agent can list / +read / write files there during its own turn. + +What to look for: + +- ``./.qm-memory/runs/`` accumulates one trajectory record per loop + iteration (3 here). +- ``./.qm-memory/notes/`` may grow if the Agent decides to write notes + while extracting (it sees the seeded ``README.md`` and is encouraged + to do so). +- ``./.qm-memory/runs.jsonl`` has 3 appended lines after this script. + +This is the "memory-accumulating workflow" pattern. ``batch_run`` +rejects ``memory=`` at the signature level — for memory accumulation +you write the loop yourself, exactly like below. + +Prerequisites: OPENAI_API_KEY, Node.js + npx, network. + +Run: + python examples/memory/02_serial_loop.py +""" + +import asyncio +from pathlib import Path + +from quantmind.configs.paper import RawText +from quantmind.flows import paper_flow +from quantmind.mind.memory import FilesystemMemory + +_FAKE_PAPERS = [ + ( + "# Momentum returns 1\n" + "Title: Cross-sectional momentum on US equities\n" + "Body: Long winners short losers monthly." + ), + ( + "# Momentum returns 2\n" + "Title: Time-series momentum on commodities\n" + "Body: 12-month look-back, monthly rebalance." + ), + ( + "# Momentum returns 3\n" + "Title: Combining XS and TS momentum\n" + "Body: 50/50 equal weight composite." + ), +] + + +async def main() -> None: + mem = FilesystemMemory(Path("./.qm-memory")) + + for idx, body in enumerate(_FAKE_PAPERS, start=1): + print(f"\n=== run {idx}/3 ===") + paper = await paper_flow(RawText(text=body), memory=mem) + print(f" -> {paper.title!r}") + + # Quick post-run summary. + runs_dir = mem.memory_dir / "runs" + print( + f"\nTrajectory files: {len(list(runs_dir.glob('*.json')))} in {runs_dir}" + ) + + notes_dir = mem.memory_dir / "notes" + notes = list(notes_dir.glob("*")) + print(f"Notes the agent left: {len(notes)} in {notes_dir}") + for n in notes[:5]: + print(f" - {n.name}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/memory/03_inspect_trajectory.py b/examples/memory/03_inspect_trajectory.py new file mode 100644 index 0000000..c705426 --- /dev/null +++ b/examples/memory/03_inspect_trajectory.py @@ -0,0 +1,59 @@ +"""03 — Read trajectory archive offline. + +After 01 / 02 have run, this script needs no network and no npx — it +opens ``runs.jsonl`` and aggregates the ``RunRecord`` fields so you +can see exactly what ``MemoryRunHooks`` captured: timing, token usage, +LLM call breakdown, tool calls. + +This is also the right shape for building dashboards / cost reports +later: each line is one self-contained run record. + +Run: + python examples/memory/03_inspect_trajectory.py ./.qm-memory +""" + +import json +import sys +from pathlib import Path + + +def main() -> None: + memory_dir = Path(sys.argv[1] if len(sys.argv) > 1 else "./.qm-memory") + runs_jsonl = memory_dir / "runs.jsonl" + if not runs_jsonl.exists(): + print(f"No {runs_jsonl}. Run 01_basic.py or 02_serial_loop.py first.") + sys.exit(1) + + records = [ + json.loads(line) for line in runs_jsonl.read_text().splitlines() if line + ] + print(f"Loaded {len(records)} run record(s) from {runs_jsonl}\n") + + # Per-run summary. + for r in records: + toks = r["tokens_total"] + print( + f" {r['run_id']} " + f"workflow={r['workflow_name']} " + f"duration={r['duration_seconds']:.2f}s " + f"in={toks['input']}/out={toks['output']} " + f"llm_calls={len(r['llm_calls'])} " + f"tool_calls={len(r['tool_calls'])} " + f"error={r['error']!r}" + ) + + # Aggregate. + total_in = sum(r["tokens_total"]["input"] for r in records) + total_out = sum(r["tokens_total"]["output"] for r in records) + total_dur = sum(r["duration_seconds"] for r in records) + n_failed = sum(1 for r in records if r["error"]) + print() + print( + f"Aggregate: tokens_in={total_in} tokens_out={total_out} " + f"total_duration={total_dur:.2f}s " + f"failed={n_failed}/{len(records)}" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/memory/04_custom_run_hooks.py b/examples/memory/04_custom_run_hooks.py new file mode 100644 index 0000000..27da29c --- /dev/null +++ b/examples/memory/04_custom_run_hooks.py @@ -0,0 +1,95 @@ +"""04 — Custom RunHooks composing with MemoryRunHooks. + +``paper_flow`` accepts ``extra_run_hooks=[...]`` so you can attach +your own ``RunHooks`` (logger, metrics emitter, slack ping, ...) +alongside the built-in ``MemoryRunHooks`` that ``FilesystemMemory`` +contributes. The runner composes them all into one fan-out hook that +fires every lifecycle event for every registered hook in registration +order. + +What to look for: + +- ``[memory]`` lines printed by the built-in MemoryRunHooks aren't + visible (it accumulates silently, then writes ``runs/.json`` + in ``finally``). +- ``[my-logger]`` lines below come from ``ConsoleLoggerHooks``, the + custom hook we plug in here. + +Prerequisites: OPENAI_API_KEY, Node.js + npx. + +Run: + python examples/memory/04_custom_run_hooks.py +""" + +import asyncio +from pathlib import Path +from typing import Any + +from agents import RunHooks + +from quantmind.configs.paper import RawText +from quantmind.flows import paper_flow +from quantmind.mind.memory import FilesystemMemory + + +class ConsoleLoggerHooks(RunHooks[Any]): + """Tiny example hook that prints lifecycle events to stdout.""" + + async def on_agent_start(self, context: Any, agent: Any) -> None: + print( + f"[my-logger] agent start name={agent.name!r} model={agent.model!r}" + ) + + async def on_llm_start(self, *_: Any, **__: Any) -> None: + print("[my-logger] llm start") + + async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None: + usage = getattr(response, "usage", None) + if usage is not None: + print( + f"[my-logger] llm end tokens_in={getattr(usage, 'input_tokens', 0)} " + f"tokens_out={getattr(usage, 'output_tokens', 0)}" + ) + else: + print("[my-logger] llm end (no usage info)") + + async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None: + print(f"[my-logger] tool start name={getattr(tool, 'name', '?')!r}") + + async def on_tool_end( + self, context: Any, agent: Any, tool: Any, result: Any + ) -> None: + snippet = ( + (str(result)[:60] + "...") if len(str(result)) > 60 else result + ) + print( + f"[my-logger] tool end name={getattr(tool, 'name', '?')!r} " + f"result={snippet!r}" + ) + + async def on_agent_end(self, context: Any, agent: Any, output: Any) -> None: + print(f"[my-logger] agent end output_type={type(output).__name__}") + + +async def main() -> None: + mem = FilesystemMemory(Path("./.qm-memory")) + + paper = await paper_flow( + RawText( + text=( + "# Toy paper\n\nTitle: Custom-hook demo\n\n" + "Body: Show that user RunHooks fire alongside MemoryRunHooks." + ) + ), + memory=mem, + extra_run_hooks=[ConsoleLoggerHooks()], + ) + print(f"\nExtracted: {paper.title!r}") + print( + f"\nTrajectory file: " + f"{sorted((mem.memory_dir / 'runs').glob('*.json'))[-1]}" + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/memory/README.md b/examples/memory/README.md new file mode 100644 index 0000000..4d66213 --- /dev/null +++ b/examples/memory/README.md @@ -0,0 +1,45 @@ +# `mind/memory` examples + +Small runnable scripts that walk you through `FilesystemMemory` + +`MemoryRunHooks` end to end. Each one is self-contained — read the +top-of-file comment block for what it shows. + +## Prerequisites + +- `pip install -e ".[dev]"` (so `quantmind` is importable). +- `OPENAI_API_KEY` set in your shell (the real Agent run uses an LLM). +- Node.js + `npx` on PATH — `FilesystemMemory` launches + `@modelcontextprotocol/server-filesystem` over stdio. Examples 01, + 02, and 04 spawn a real MCP subprocess; example 03 only reads disk. + +## What each script demonstrates + +| # | Script | What you learn | +|---|--------|----------------| +| 01 | `01_basic.py` | The shortest possible memory run: build a `FilesystemMemory`, pass it to `paper_flow`, then look at the disk layout that gets created (`notes/`, `items/`, `runs/`, `runs.jsonl`). | +| 02 | `02_serial_loop.py` | Process several inputs in a serial `for` loop sharing one `FilesystemMemory`. Each run can read what previous runs left in `notes/` via the MCP filesystem server. (`batch_run` rejects `memory=` by design — for memory-accumulating workflows, you write the loop yourself.) | +| 03 | `03_inspect_trajectory.py` | Disk-only analysis: open `runs.jsonl`, parse the `RunRecord` JSON, and aggregate tokens / durations across runs. No network or `npx` needed. | +| 04 | `04_custom_run_hooks.py` | Compose your own `RunHooks` (e.g., a custom logger) alongside the built-in `MemoryRunHooks` via the `extra_run_hooks=` kwarg. Both hooks fire for every lifecycle event in the order you registered them. | + +## Run them + +```bash +# from the repo root, after installing +python examples/memory/01_basic.py +python examples/memory/02_serial_loop.py +python examples/memory/03_inspect_trajectory.py ./.qm-memory +python examples/memory/04_custom_run_hooks.py +``` + +All four examples write under `./.qm-memory/` (a directory created +under your current working directory). Delete it freely between runs; +nothing is shared with your other projects. + +## A note on `cfg.archive_trajectory` + +`FilesystemMemory.run_hooks()` returns the `MemoryRunHooks` instance +that produces `runs/.json` and the `runs.jsonl` index. The +runner only attaches it when `cfg.archive_trajectory` is `True` +(the default). If you set it to `False`, the agent still gets the +`mcp_servers` + `tools` from your `Memory` (so it can read previous +notes) — only the trajectory archive is suppressed. diff --git a/pyproject.toml b/pyproject.toml index 270ded0..654ff32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,9 @@ ignore = [ [tool.ruff.lint.per-file-ignores] # Tests don't need module/function docstrings or strict bugbear rules. "tests/**/*.py" = ["D", "B"] +# Example scripts: top-of-file module docstring is enough; per-function +# docstrings just clutter short demos. +"examples/**/*.py" = ["D"] "**/__init__.py" = ["D104", "F401"] [tool.ruff.lint.pycodestyle]