Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/lang2sql/adapters/db/dsn_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,18 @@ def build_bigquery(*, project: str, dataset: str) -> ConnectionSpec:


def build_duckdb(*, path: str) -> ConnectionSpec:
return ConnectionSpec(dsn=f"duckdb:///{path}", extras={})
# Accept either a bare filesystem path *or* a full ``duckdb:`` DSN pasted
# into the field (don't double-wrap the latter into ``duckdb:///duckdb:…``).
path = path.strip()
if path.startswith("duckdb:"):
dsn = path
elif path == ":memory:":
dsn = "duckdb:///:memory:"
else:
# Absolute paths already start with "/", so this yields the required
# four-slash form (duckdb:////abs); relative paths get three.
dsn = f"duckdb:///{path}"
return ConnectionSpec(dsn=dsn, extras={})


def build_d1(*, account_id: str, database_id: str, api_token: str) -> ConnectionSpec:
Expand Down
9 changes: 9 additions & 0 deletions src/lang2sql/harness/system_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
- When you need data, call the run_sql tool with a single SELECT/WITH query.
- Discover schema with explore_schema before guessing table or column names.
- Prefer definitions from the semantic layer below over your own assumptions.
- For a total count or sum, use ONE aggregate (e.g. COUNT(DISTINCT ...), SUM(...))
with NO GROUP BY, unless the user explicitly asks for a per-group breakdown.
Never GROUP BY the very entity you are counting — that returns one row per
entity (each value = 1), not the total.
- Never put a LIMIT on an aggregate query, and never pass a small `limit` for one.
`limit` is only for listing raw rows (default 1000); a small LIMIT on a
GROUP BY silently truncates the result and yields a wrong total.
- Sanity-check the answer: if a count/total looks implausible (e.g. "1 customer"
for a large table), assume the SQL is wrong, rewrite it, and re-run before answering.
- Answer concisely. Show only the final successful SQL you ran, not intermediate attempts.
"""

Expand Down
82 changes: 71 additions & 11 deletions src/lang2sql/tools/semantic_federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,12 @@ async def run(self, args: dict[str, Any], ctx: "HarnessContext") -> ToolResult:
channel_id = ctx.identity.effective_channel_id

if args.get("list"):
return ToolResult(call_id="", content=_render_effective(ctx.store, scope, channel_id, user_id))
eff = _render_effective(ctx.store, scope, channel_id, user_id)
layers = _render_layers(ctx.store, scope, channel_id, user_id)
return ToolResult(
call_id="",
content=eff + "\n\n---\n## 레이어별 저장 현황 (덮어쓰기 아님 — 계층)\n" + layers,
)

if args.get("scan"):
return ToolResult(call_id="", content=_scan_schema(ctx.store, scope))
Expand Down Expand Up @@ -352,7 +357,13 @@ def build_prompt_section(store: Any, scope: str, channel_id: str, user_id: str)
2. 쿼리 후 사용한 해석을 명시하고, term_custom 등록 여부와 범위(guild/channel/member)를 사용자에게 묻는다.
예: "'신규고객'을 'users.created_at >= NOW()-30일'로 해석했습니다. 이 정의를 어느 범위로 등록할까요?"
3. 사용자가 범위를 지정하면 term_custom 툴로 즉시 등록한다 (inferred=true).
4. inferred=true 엔트리가 이미 있으면 해당 정의를 우선 사용하되, 사용자에게 확정 여부를 확인한다.\
4. inferred=true 엔트리가 이미 있으면 해당 정의를 우선 사용하되, 사용자에게 확정 여부를 확인한다.

사용자가 용어의 정의를 직접 알려주면(예: "활성 고객은 2010·2011년에 한 번이라도 구매한 고객이야"):
5. 되묻지 말고 그 정의대로 term_custom 툴을 **반드시 호출해 즉시 등록**한다
(범위는 현재 채널=channel 기본, inferred=false). 등록한 뒤 그 정의로 SQL을 만든다.
6. 한 번 등록한 용어는 매 턴 새로 해석하지 말고, 저장된 정의를 그대로 사용해 일관되게 답한다.
(등록을 건너뛰고 답만 하면 다음 질문에서 답이 달라지므로 반드시 먼저 등록할 것.)\
"""


Expand All @@ -364,22 +375,30 @@ def _fmt_entry(e: FedEntry, tag: str) -> str:


def _resolve_term(entries: list[FedEntry], channel_id: str, user_id: str) -> str:
"""narrow→wide lookup: member > channel > guild."""
"""narrow→wide lookup: member > channel > guild.

채널/개인 정의가 전사 정의를 *덮을* 때, 전사 기본 정의를 함께 표기한다.
(override는 전사 정의를 삭제·은폐하는 게 아니라 그 위에 얹히는 것이므로.)
"""
guild_e = next((e for e in entries if e.layer == "guild"), None)

def _line(e: FedEntry, tag: str) -> str:
line = _fmt_entry(e, tag)
if guild_e is not None and e is not guild_e:
line += f" (전사 기본: {guild_e.definition})"
return line

# 1. 개인 오버라이드
for e in entries:
if e.layer == "member" and e.entity == user_id:
return _fmt_entry(e, f"개인:{user_id}")

return _line(e, f"개인:{user_id}")
# 2. 이 채널 정의
for e in entries:
if e.layer == "channel" and e.entity == channel_id:
return _fmt_entry(e, "채널")

return _line(e, "채널")
# 3. 전사 공통
for e in entries:
if e.layer == "guild":
return _fmt_entry(e, "전사")

if guild_e is not None:
return _fmt_entry(guild_e, "전사")
return ""


Expand All @@ -398,3 +417,44 @@ def _render_effective(store: Any, scope: str, channel_id: str, user_id: str) ->
if len(lines) == 1:
lines.append("(이 채널에 적용되는 용어 정의가 없습니다)")
return "\n".join(lines)


def _render_layers(store: Any, scope: str, channel_id: str, user_id: str) -> str:
"""레이어별(전사/채널/개인) 전체 정의를 보여준다.

``_render_effective``가 용어당 *유효 정의 하나*만 보여주는 탓에 채널 override가
전사 정의를 화면에서 가리는 문제를 보완 — 각 레이어를 따로 나열해 전사 정의가
그대로 살아있음을 드러낸다.
"""
by_term = _load_all(store, scope)
if not by_term:
return "등록된 용어가 없습니다."

guild: list[FedEntry] = []
channel: list[FedEntry] = []
member: list[FedEntry] = []
for entries in by_term.values():
for e in entries:
if e.layer == "guild":
guild.append(e)
elif e.layer == "channel" and e.entity == channel_id:
channel.append(e)
elif e.layer == "member" and e.entity == user_id:
member.append(e)

redefined = {e.term.lower() for e in channel} | {e.term.lower() for e in member}
out: list[str] = []
if guild:
out.append("### 전사(guild) 공통")
for e in sorted(guild, key=lambda x: x.term.lower()):
note = " ⤷ 이 채널/개인에서 재정의됨 (전사 정의는 유지)" if e.term.lower() in redefined else ""
out.append(_fmt_entry(e, "전사") + note)
if channel:
out.append("\n### 이 채널(팀) 정의 — 전사 위에 덮어씀")
for e in sorted(channel, key=lambda x: x.term.lower()):
out.append(_fmt_entry(e, "채널"))
if member:
out.append("\n### 개인 정의")
for e in sorted(member, key=lambda x: x.term.lower()):
out.append(_fmt_entry(e, "개인"))
return "\n".join(out) if out else "등록된 정의가 없습니다."
5 changes: 3 additions & 2 deletions tests/test_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ def test_kv_channel_overrides_guild_persisted(tmp_path) -> None:

reader = SqliteStore(db)
rendered = _render_effective(reader, scope, "c1", "u1")
assert "channel def" in rendered
assert "guild def" not in rendered
assert "channel def" in rendered # channel wins (effective)
# guild base shown for transparency (override does NOT hide the guild def)
assert "전사 기본: guild def" in rendered
reader.close()


Expand Down
35 changes: 30 additions & 5 deletions tests/test_semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ def test_channel_overrides_guild() -> None:
store.kv_set(scope, _kv_key("active_user", "channel", "c1"), FedEntry("active_user", "channel", "c1", "7d core action").to_json())

rendered = _render_effective(store, scope, "c1", "u1")
assert "7d core action" in rendered
assert "30d login" not in rendered
assert "7d core action" in rendered # channel wins (effective)
# guild base shown for transparency (override does NOT hide the guild def)
assert "전사 기본: 30d login" in rendered


def test_guild_fills_gap_when_channel_missing() -> None:
Expand All @@ -55,9 +56,10 @@ def test_member_overrides_channel_and_guild() -> None:
store.kv_set(scope, _kv_key("active_user", "member", "u1"), FedEntry("active_user", "member", "u1", "member def").to_json())

rendered = _render_effective(store, scope, "c1", "u1")
assert "member def" in rendered
assert "channel def" not in rendered
assert "guild def" not in rendered
assert "member def" in rendered # member wins (effective)
assert "channel def" not in rendered # channel is overridden by member
# guild base is shown for transparency (override does NOT hide the guild def)
assert "전사 기본: guild def" in rendered


def test_two_channels_isolated() -> None:
Expand All @@ -84,3 +86,26 @@ def test_build_prompt_section_includes_ambiguous_term_policy() -> None:
store = SqliteStore()
section = build_prompt_section(store, "g1", "c1", "u1")
assert "Ambiguous Term Policy" in section


def test_channel_override_keeps_guild_visible() -> None:
"""채널 override가 전사 정의를 데이터·표시 양쪽에서 가리지 않는다 (federation 회귀)."""
from lang2sql.tools.semantic_federation import _render_layers, _kv_key as _k
store = SqliteStore()
scope = "g1"
store.kv_set(scope, _k("invoice", "guild", ""),
FedEntry("Invoice", "guild", "", "판매 거래 식별자").to_json())
store.kv_set(scope, _k("invoice", "channel", "mkt"),
FedEntry("Invoice", "channel", "mkt", "영수증").to_json())

# 마케팅 채널: override가 이기되 전사 기본이 함께 보인다
eff_mkt = _render_effective(store, scope, "mkt", "u1")
assert "영수증" in eff_mkt and "전사 기본: 판매 거래 식별자" in eff_mkt

# 다른 채널: override 없으니 전사 정의 그대로
eff_other = _render_effective(store, scope, "fin", "u1")
assert "판매 거래 식별자" in eff_other and "영수증" not in eff_other

# 레이어별 보기: 전사 Invoice와 채널 Invoice가 둘 다 노출
layers = _render_layers(store, scope, "mkt", "u1")
assert "판매 거래 식별자" in layers and "영수증" in layers and "재정의됨" in layers
Loading