From 331a7a7b0bac23c8ef414b70d4f6d51cfb95fbfa Mon Sep 17 00:00:00 2001 From: Marius Arvinte Date: Sat, 30 May 2026 16:02:55 -0700 Subject: [PATCH] Release 2026.5 (#858) # :tada: Major Updates - **Improved C code consolidation** - Significantly improves the robustness of C code consolidation (amalgamation) of multiple TUs into a single file, enabling the derivation of project-wide abstract syntax trees (ASTs) for large-scale, real-world projects that use the CMake build system - Use standalone with `make examples/path/to/project/init` - **Robust agentic test generation** - Our Rust C FFI test generation agents are now equipped with two statically defined tools in their environment: `nextest`-based thread-safe test execution and `cargo-llvm` source-based coverage - Generate tests for mid-scale consolidated C projects using `make examples/path/to/project/testgen_agent` # :balloon: Minor Updates - Translation and wrapper prompt clean-up and generalization - Automatic test disabling: after a C FFI test fails a certain number of the tests, it automatically gets disabled, and the remainder of translation proceeds without testing it - Clean-up deprecated functionalities in `LEARNING.mk` - Added dollar cost tracking for the main per-symbol translation loop --------- Co-authored-by: Cory Cornelius Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- AGENTS.mk | 121 +- IDEAS.mk | 201 +-- LEARNING.mk | 63 - Makefile | 66 +- docker/ideas.Dockerfile | 21 +- extract_info.cmake | 9 +- pyproject.toml | 6 +- src/ideas/adapters.py | 92 +- src/ideas/agents/build.py | 264 ++++ src/ideas/agents/printer.py | 37 +- src/ideas/agents/testgen.py | 501 ++++---- src/ideas/agents/testgen_bin.py | 464 +++++++ src/ideas/agents/utils.py | 112 ++ src/ideas/ast.py | 283 ++++- src/ideas/ast_rust.py | 63 +- src/ideas/cmake.py | 26 +- src/ideas/convert_tests.py | 32 +- src/ideas/evaluate.py | 101 ++ src/ideas/init/build.py | 252 ++++ src/ideas/init/consolidate.py | 453 ++++++- src/ideas/init/crate.py | 59 +- src/ideas/learn/__init__.py | 0 src/ideas/learn/translate.py | 203 --- src/ideas/model.py | 33 +- src/ideas/sync.rs | 136 -- src/ideas/test_symbol.py | 216 +--- src/ideas/tools.py | 265 ++-- src/ideas/translate.py | 80 +- src/ideas/translate_recurrent.py | 280 ++-- src/ideas/translate_snippet.py | 324 +++-- src/ideas/wrapper.py | 673 ++++++---- test/fixtures/code_preprocessing/expected.c.i | 17 - test/fixtures/code_preprocessing/input.c.i | 42 - test/fixtures/compile/clippy.rs | 195 --- .../test_case/CMakeLists.txt | 18 + .../isystem_inline_dep/test_case/ext/bridge.h | 8 + .../isystem_inline_dep/test_case/ext/caller.c | 19 + .../isystem_inline_dep/test_case/main.c | 10 + .../isystem_inline_dep/test_case/util/alloc.h | 10 + .../isystem_inline_dep/test_case/util/user.c | 6 + .../test_case/xdiff/bridge.h | 8 + .../test_case/xdiff/caller.c | 19 + .../scc_ordering_bug/test_case/CMakeLists.txt | 17 + .../scc_ordering_bug/test_case/include/fwd.h | 13 + .../test_case/include/header.h | 15 + .../scc_ordering_bug/test_case/src/caller.c | 5 + .../scc_ordering_bug/test_case/src/main.c | 9 + .../scc_ordering_bug/test_case/src/state.c | 8 + .../typedef_cross_tu/test_case/CMakeLists.txt | 16 + .../test_case/include/types.h | 4 + .../typedef_cross_tu/test_case/src/a.c | 17 + .../typedef_cross_tu/test_case/src/b.c | 5 + .../typedef_cross_tu/test_case/src/c.c | 10 + .../typedef_cross_tu/test_vectors/test.json | 8 + test/test_cargo_test.py | 138 ++ test/test_clang.py | 164 ++- test/test_cmake.py | 113 ++ test/test_consolidate.py | 1121 +++++++++++++++++ test/test_extract_code_from_tu.py | 16 +- test/test_templating.py | 6 +- test/test_tools.py | 90 -- test/test_wrapper.py | 213 ++++ tools/crateify/Cargo.lock | 7 - tools/crateify/Cargo.toml | 11 - tools/crateify/README.md | 17 - tools/crateify/src/main.rs | 78 -- tools/rust_tests/lib_testing.rs | 49 + uv.lock | 900 ++++++++++++- 68 files changed, 6512 insertions(+), 2326 deletions(-) delete mode 100644 LEARNING.mk create mode 100644 src/ideas/agents/build.py create mode 100644 src/ideas/agents/testgen_bin.py create mode 100644 src/ideas/agents/utils.py create mode 100644 src/ideas/evaluate.py create mode 100644 src/ideas/init/build.py delete mode 100644 src/ideas/learn/__init__.py delete mode 100644 src/ideas/learn/translate.py delete mode 100644 src/ideas/sync.rs delete mode 100644 test/fixtures/code_preprocessing/expected.c.i delete mode 100644 test/fixtures/code_preprocessing/input.c.i delete mode 100644 test/fixtures/compile/clippy.rs create mode 100644 test/fixtures/isystem_inline_dep/test_case/CMakeLists.txt create mode 100644 test/fixtures/isystem_inline_dep/test_case/ext/bridge.h create mode 100644 test/fixtures/isystem_inline_dep/test_case/ext/caller.c create mode 100644 test/fixtures/isystem_inline_dep/test_case/main.c create mode 100644 test/fixtures/isystem_inline_dep/test_case/util/alloc.h create mode 100644 test/fixtures/isystem_inline_dep/test_case/util/user.c create mode 100644 test/fixtures/isystem_inline_dep/test_case/xdiff/bridge.h create mode 100644 test/fixtures/isystem_inline_dep/test_case/xdiff/caller.c create mode 100644 test/fixtures/scc_ordering_bug/test_case/CMakeLists.txt create mode 100644 test/fixtures/scc_ordering_bug/test_case/include/fwd.h create mode 100644 test/fixtures/scc_ordering_bug/test_case/include/header.h create mode 100644 test/fixtures/scc_ordering_bug/test_case/src/caller.c create mode 100644 test/fixtures/scc_ordering_bug/test_case/src/main.c create mode 100644 test/fixtures/scc_ordering_bug/test_case/src/state.c create mode 100644 test/fixtures/typedef_cross_tu/test_case/CMakeLists.txt create mode 100644 test/fixtures/typedef_cross_tu/test_case/include/types.h create mode 100644 test/fixtures/typedef_cross_tu/test_case/src/a.c create mode 100644 test/fixtures/typedef_cross_tu/test_case/src/b.c create mode 100644 test/fixtures/typedef_cross_tu/test_case/src/c.c create mode 100644 test/fixtures/typedef_cross_tu/test_vectors/test.json create mode 100644 test/test_cargo_test.py create mode 100644 test/test_cmake.py create mode 100644 test/test_consolidate.py create mode 100644 test/test_wrapper.py delete mode 100644 tools/crateify/Cargo.lock delete mode 100644 tools/crateify/Cargo.toml delete mode 100644 tools/crateify/README.md delete mode 100644 tools/crateify/src/main.rs create mode 100644 tools/rust_tests/lib_testing.rs diff --git a/AGENTS.mk b/AGENTS.mk index 58dca55..10f52f4 100644 --- a/AGENTS.mk +++ b/AGENTS.mk @@ -38,48 +38,107 @@ else RUN_SUFFIX = endif +TARGETS_LIB ?= $(shell [ -d build-ninja ] && find build-ninja -maxdepth 1 -type f -executable -exec basename {} \; | cut -d. -f1 | grep -E "^lib" | sed -e "s/^lib//gi") +TARGETS_BIN ?= $(shell [ -d build-ninja ] && find build-ninja -maxdepth 1 -type f -executable -exec basename {} \; | cut -d. -f1 | grep -vE "^lib") +TARGETS ?= $(TARGETS_BIN) $(TARGETS_LIB) +ifeq (${TARGETS},) +ifeq ($(filter cmake clean,$(MAKECMDGOALS)),) +$(error No TARGETS found! You need to run cmake!) +endif +endif -# test generation from project -.PHONY: testgen -testgen: test_crate/tests/test_assert.rs ; -.PRECIOUS: test_crate/tests/test_assert.rs -test_crate/tests/test_assert.rs: - $(RUN_PREFIX) \ - uv run python -m ideas.agents.testgen model=$(if $(AGENT_PROVIDER),${AGENT_PROVIDER}/,)${AGENT_MODEL} \ - c_code=test_case \ - project_name=$(notdir $(CURDIR)) \ - test_vectors_out=test_vectors/agent \ - test_crate_out=test_crate \ - hydra.output_subdir=.testgen \ - hydra.job.name=testgen \ - hydra.run.dir=test_vectors \ - $(RUN_SUFFIX) - # Agent is not guaranteed to write file - [ -f test_crate/tests/test_assert.rs ] || { echo "ERROR: Agent failed to generate test_crate/tests/test_assert.rs"; exit 1; } +.PRECIOUS: test_crates/%/Cargo.toml +.PRECIOUS: test_crates/%/src/lib.c +.PRECIOUS: test_crates/%/src/main.c +.PRECIOUS: test_crates/%/build.rs + +test_crates/%/Cargo.toml \ +test_crates/%/src/lib.c \ +test_crates/%/build.rs: | build-ninja/lib%.so.sources + uv run python -m ideas.init.crate crate_type=lib \ + reexport_lib=false \ + hydra.output_subdir=null \ + hydra.run.dir=test_crates/$* + uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ + cargo_toml=test_crates/$*/Cargo.toml \ + source_priority=build-ninja/lib$*.so.sources \ + hydra.output_subdir=null \ + hydra.run.dir=test_crates/$* + uv run python -m ideas.agents.build instrumentation=coverage \ + hydra.output_subdir=null \ + hydra.job.name=init.build \ + hydra.run.dir=test_crates/$* +test_crates/%/Cargo.toml \ +test_crates/%/src/main.c \ +test_crates/%/build.rs: | build-ninja/%.sources + uv run python -m ideas.init.crate crate_type=bin \ + hydra.output_subdir=null \ + hydra.run.dir=test_crates/$* + uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ + cargo_toml=test_crates/$*/Cargo.toml \ + source_priority=build-ninja/$*.sources \ + hydra.output_subdir=null \ + hydra.run.dir=test_crates/$* + uv run python -m ideas.agents.build instrumentation=coverage \ + hydra.output_subdir=null \ + hydra.job.name=init.build \ + hydra.run.dir=test_crates/$* + + +.PHONY: testgen_agent +testgen_agent: $(patsubst %,test_crates/%/tests/test_assert.rs,${TARGETS}) ; -# library targets: generate tests from the consolidated lib.c .PRECIOUS: test_crates/%/tests/test_assert.rs -test_crates/%/tests/test_assert.rs: ${TRANSLATION_DIR}/%/src/lib.c | build-ninja/lib%.so.type - # Copy lib.c into test_crates//src/ so - # build.rs can use ../../test_crates//src/lib.c - # both in Docker /tmp and on disk - mkdir -p test_crates/$*/src - cp ${TRANSLATION_DIR}/$*/src/lib.c test_crates/$*/src/lib.c +test_crates/%/tests/test_assert.rs: test_crates/%/Cargo.toml test_crates/%/src/lib.c | build-ninja/lib%.so.sources $(RUN_PREFIX) \ uv run python -m ideas.agents.testgen model=$(if $(AGENT_PROVIDER),${AGENT_PROVIDER}/,)${AGENT_MODEL} \ + cargo_toml=test_crates/$*/Cargo.toml \ c_code=test_crates/$*/src/lib.c \ project_name=$* \ - test_vectors_out=test_vectors/$*/agent \ test_crate_out=test_crates/$* \ - hydra.output_subdir=.testgen \ + hydra.output_subdir=null \ hydra.job.name=testgen \ - hydra.run.dir=test_vectors/$* \ + hydra.run.dir=test_crates/$* \ $(RUN_SUFFIX) - # Agent is not guaranteed to write file + $(RUN_PREFIX) \ + uv run python -m ideas.agents.testgen model=$(if $(AGENT_PROVIDER),${AGENT_PROVIDER}/,)${AGENT_MODEL} \ + guarantee_assert_tests=true \ + collect_to_assert=true \ + cargo_toml=test_crates/$*/Cargo.toml \ + c_code=test_crates/$*/src/lib.c \ + project_name=$* \ + test_crate_out=test_crates/$* \ + hydra.output_subdir=null \ + hydra.job.name=assert_writer \ + hydra.run.dir=test_crates/$* \ + $(RUN_SUFFIX) + # Agents are not guaranteed to produce the file [ -f test_crates/$*/tests/test_assert.rs ] || { echo "ERROR: Agent failed to generate test_crates/$*/tests/test_assert.rs"; exit 1; } -# executable targets: do nothing -test_crates/%/tests/test_assert.rs: ${TRANSLATION_DIR}/%/src/main.c | build-ninja/%.type - $(error Agent cannot generate tests for binary targets yet!) +test_crates/%/tests/test_assert.rs: test_crates/%/Cargo.toml test_crates/%/src/main.c | build-ninja/%.sources + $(RUN_PREFIX) \ + uv run python -m ideas.agents.testgen_bin model=$(if $(AGENT_PROVIDER),${AGENT_PROVIDER}/,)${AGENT_MODEL} \ + cargo_toml=test_crates/$*/Cargo.toml \ + c_code=test_crates/$*/src/main.c \ + project_name=$* \ + test_crate_out=test_crates/$* \ + hydra.output_subdir=null \ + hydra.job.name=testgen \ + hydra.run.dir=test_crates/$* \ + $(RUN_SUFFIX) + $(RUN_PREFIX) \ + uv run python -m ideas.agents.testgen_bin model=$(if $(AGENT_PROVIDER),${AGENT_PROVIDER}/,)${AGENT_MODEL} \ + guarantee_assert_tests=true \ + collect_to_assert=true \ + cargo_toml=test_crates/$*/Cargo.toml \ + c_code=test_crates/$*/src/main.c \ + project_name=$* \ + test_crate_out=test_crates/$* \ + hydra.output_subdir=null \ + hydra.job.name=assert_writer \ + hydra.run.dir=test_crates/$* \ + $(RUN_SUFFIX) + # Agents are not guaranteed to produce the file + [ -f test_crates/$*/tests/test_assert.rs ] || { echo "ERROR: Agent failed to generate test_crates/$*/tests/test_assert.rs"; exit 1; } diff --git a/IDEAS.mk b/IDEAS.mk index 84d6a8e..b31e521 100644 --- a/IDEAS.mk +++ b/IDEAS.mk @@ -6,8 +6,6 @@ MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) MAKEFILE_DIR := $(realpath $(dir $(MAKEFILE_PATH))) -PIPELINE_DIR := ${MAKEFILE_DIR}/lib/pipeline_automation -PIPELINE_TAG := ideas/$(shell git rev-list -1 HEAD -- ${PIPELINE_DIR}) EXTRACT_INFO_CMAKE := ${MAKEFILE_DIR}/extract_info.cmake AGENTS_MAKEFILE := $(MAKEFILE_DIR)/AGENTS.mk @@ -23,16 +21,25 @@ endif RUSTFLAGS ?= -Awarnings## Ignore Rust compiler warnings CARGO_NET_OFFLINE ?= true## Cargo offline mode CFLAGS ?= -w## Ignore C compiler warnings -export EXTRACT_INFO_CMAKE CFLAGS +LARGE_PROJECT ?= 0## Disable translation-time tests and enable context compression +export EXTRACT_INFO_CMAKE CFLAGS LARGE_PROJECT VCS ?= git GIT_AUTHOR_NAME ?= ideas GIT_AUTHOR_EMAIL ?= ideas@localhost export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL -## Per-target test vectors: test_vectors//*.json -TEST_FILES = $(wildcard test_vectors/$*/*.json) -TARGETS ?= $(shell [ -d build-ninja ] && find build-ninja -maxdepth 1 -type f -executable -exec basename {} \; | cut -d. -f1 | sed -e "s/^lib//gi") +ifeq ($(LARGE_PROJECT),1) +TRANSLATION_TEST ?= smoke +else +TRANSLATION_TEST ?= test_assert +endif + +EVALUATION_TEST ?= test_cases +TEST_FILES := $(wildcard test_vectors/*.json) +TARGETS_LIB ?= $(shell [ -d build-ninja ] && find build-ninja -maxdepth 1 -name 'lib*.so.sources' -exec basename {} .so.sources \; | sed -e "s/^lib//gi") +TARGETS_BIN ?= $(shell [ -d build-ninja ] && find build-ninja -maxdepth 1 -name '*.sources' ! -name 'lib*.so.sources' -exec basename {} .sources \; ) +TARGETS ?= $(TARGETS_BIN) $(TARGETS_LIB) ifeq (${TARGETS},) ifeq ($(filter cmake clean,$(MAKECMDGOALS)),) $(error No TARGETS found! You need to run cmake!) @@ -55,10 +62,9 @@ build-ninja/build.log: build-ninja/cmake.log # init .PHONY: init init: $(patsubst %,${TRANSLATION_DIR}/%/init,${TARGETS}) ; -${TRANSLATION_DIR}/%/init: ${TRANSLATION_DIR}/%/src/lib.c | build-ninja/lib%.so.type - touch ${TRANSLATION_DIR}/$*/src/lib.c -${TRANSLATION_DIR}/%/init: ${TRANSLATION_DIR}/%/src/main.c | build-ninja/%.type - touch ${TRANSLATION_DIR}/$*/src/main.c +${TRANSLATION_DIR}/%/init: ${TRANSLATION_DIR}/%/build.rs + touch ${TRANSLATION_DIR}/$*/Cargo.toml + touch ${TRANSLATION_DIR}/$*/build.rs # initialize workspace .PRECIOUS: ${TRANSLATION_DIR}/Cargo.toml @@ -67,74 +73,78 @@ ${TRANSLATION_DIR}/Cargo.toml: uv run python -m ideas.init.workspace cargo_toml=$@ vcs=${VCS} # initialize translated crate for each C target -.PRECIOUS: ${TRANSLATION_DIR}/%/Cargo.toml -${TRANSLATION_DIR}/%/Cargo.toml: | ${TRANSLATION_DIR}/Cargo.toml build-ninja/lib%.so.type - uv run python -m ideas.init.crate crate_type=lib vcs=${VCS} \ - hydra.output_subdir=.init \ - hydra.run.dir=${TRANSLATION_DIR}/$* - -.PRECIOUS: ${TRANSLATION_DIR}/%/Cargo.toml -${TRANSLATION_DIR}/%/Cargo.toml: | ${TRANSLATION_DIR}/Cargo.toml build-ninja/%.type - uv run python -m ideas.init.crate crate_type=bin vcs=${VCS} \ - hydra.output_subdir=.init \ - hydra.run.dir=${TRANSLATION_DIR}/$* - # consolidate each C target +# generate build scripts +.PRECIOUS: ${TRANSLATION_DIR}/%/Cargo.toml .PRECIOUS: ${TRANSLATION_DIR}/%/src/lib.c -${TRANSLATION_DIR}/%/src/lib.c: | ${TRANSLATION_DIR}/%/Cargo.toml build-ninja/compile_commands.json build-ninja/lib%.so.sources - -uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ - vcs=${VCS} \ - cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ - source_priority=build-ninja/lib$*.so.sources \ - hydra.output_subdir=.init.consolidate \ - hydra.run.dir=${TRANSLATION_DIR}/$* - .PRECIOUS: ${TRANSLATION_DIR}/%/src/main.c -${TRANSLATION_DIR}/%/src/main.c: | ${TRANSLATION_DIR}/%/Cargo.toml build-ninja/compile_commands.json build-ninja/%.sources - -uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ - vcs=${VCS} \ - cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ - source_priority=build-ninja/$*.sources \ - hydra.output_subdir=.init.consolidate \ - hydra.run.dir=${TRANSLATION_DIR}/$* +.PRECIOUS: ${TRANSLATION_DIR}/%/build.rs + +${TRANSLATION_DIR}/%/Cargo.toml \ +${TRANSLATION_DIR}/%/src/lib.c \ +${TRANSLATION_DIR}/%/build.rs: | ${TRANSLATION_DIR}/Cargo.toml build-ninja/compile_commands.json build-ninja/lib%.so.sources + uv run python -m ideas.init.crate crate_type=lib \ + vcs=${VCS} \ + hydra.output_subdir=.init.crate \ + hydra.run.dir=${TRANSLATION_DIR}/$* + uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ + vcs=${VCS} \ + cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ + source_priority=build-ninja/lib$*.so.sources \ + hydra.output_subdir=.init.consolidate \ + hydra.run.dir=${TRANSLATION_DIR}/$* + uv run python -m ideas.init.build vcs=${VCS} \ + hydra.output_subdir=.init.build \ + hydra.job.name=init.build \ + hydra.run.dir=${TRANSLATION_DIR}/$* + +${TRANSLATION_DIR}/%/Cargo.toml \ +${TRANSLATION_DIR}/%/src/main.c \ +${TRANSLATION_DIR}/%/build.rs: | ${TRANSLATION_DIR}/Cargo.toml build-ninja/compile_commands.json build-ninja/%.sources + uv run python -m ideas.init.crate crate_type=bin \ + vcs=${VCS} \ + hydra.output_subdir=.init.crate \ + hydra.run.dir=${TRANSLATION_DIR}/$* + uv run python -m ideas.init.consolidate filename=build-ninja/compile_commands.json \ + vcs=${VCS} \ + cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ + source_priority=build-ninja/$*.sources \ + hydra.output_subdir=.init.consolidate \ + hydra.run.dir=${TRANSLATION_DIR}/$* + uv run python -m ideas.init.build vcs=${VCS} \ + hydra.output_subdir=.init.build \ + hydra.job.name=init.build \ + hydra.run.dir=${TRANSLATION_DIR}/$* # translate .PHONY: translate translate: $(patsubst %,${TRANSLATION_DIR}/%/translate,${TARGETS}) ; -${TRANSLATION_DIR}/%/translate: ${TRANSLATION_DIR}/%/src/lib.rs | build-ninja/lib%.so.type ; -${TRANSLATION_DIR}/%/translate: ${TRANSLATION_DIR}/%/src/main.rs | build-ninja/%.type ; +${TRANSLATION_DIR}/%/translate: ${TRANSLATION_DIR}/%/src/lib.rs | build-ninja/lib%.so.sources ; +${TRANSLATION_DIR}/%/translate: ${TRANSLATION_DIR}/%/src/main.rs | build-ninja/%.sources ; .PRECIOUS: ${TRANSLATION_DIR}/%/src/lib.rs -${TRANSLATION_DIR}/%/src/lib.rs: ${TRANSLATION_DIR}/%/src/lib.c | ${TRANSLATION_DIR}/%/Cargo.toml ${TRANSLATION_DIR}/%/tests/test_assert.rs +${TRANSLATION_DIR}/%/src/lib.rs: ${TRANSLATION_DIR}/%/src/lib.c | ${TRANSLATION_DIR}/%/Cargo.toml ${TRANSLATION_DIR}/%/tests/${TRANSLATION_TEST}.rs -uv run python -m ideas.translate model.name=${PROVIDER}/${MODEL} \ filename=${TRANSLATION_DIR}/$*/src/lib.c \ cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ + tests=${TRANSLATION_TEST} \ vcs=${VCS} \ hydra.output_subdir=.translate \ hydra.job.name=translate \ hydra.run.dir=${TRANSLATION_DIR}/$* ${TRANSLATE_ARGS} + @touch $@ .PRECIOUS: ${TRANSLATION_DIR}/%/src/main.rs -${TRANSLATION_DIR}/%/src/main.rs: ${TRANSLATION_DIR}/%/src/main.c ${TRANSLATION_DIR}/%/tests/test_cases.rs | ${TRANSLATION_DIR}/%/Cargo.toml +${TRANSLATION_DIR}/%/src/main.rs: ${TRANSLATION_DIR}/%/src/main.c | ${TRANSLATION_DIR}/%/Cargo.toml ${TRANSLATION_DIR}/%/tests/${TRANSLATION_TEST}.rs -uv run python -m ideas.translate model.name=${PROVIDER}/${MODEL} \ filename=${TRANSLATION_DIR}/$*/src/main.c \ cargo_toml=${TRANSLATION_DIR}/$*/Cargo.toml \ + tests=${TRANSLATION_TEST} \ vcs=${VCS} \ hydra.output_subdir=.translate \ hydra.job.name=translate \ hydra.run.dir=${TRANSLATION_DIR}/$* ${TRANSLATE_ARGS} - - -# wrapper -.PHONY: wrapper -wrapper: $(patsubst %,${TRANSLATION_DIR}/%/wrapper,${TARGETS}) ; -${TRANSLATION_DIR}/%/wrapper: ${TRANSLATION_DIR}/%/src/wrapper.rs ; - -.PRECIOUS: ${TRANSLATION_DIR}/%/src/wrapper.rs -${TRANSLATION_DIR}/%/src/wrapper.rs: ${TRANSLATION_DIR}/%/src/lib.rs | ${TRANSLATION_DIR}/%/Cargo.toml - touch $@ -${TRANSLATION_DIR}/%/src/wrapper.rs: ${TRANSLATION_DIR}/%/src/main.rs - touch $@ + @touch $@ # build .PHONY: build @@ -145,45 +155,32 @@ ${TRANSLATION_DIR}/build.log: $(patsubst %,${TRANSLATION_DIR}/%/build.log,${TARG cat $^ > $@ .PRECIOUS: ${TRANSLATION_DIR}/%/build.log -${TRANSLATION_DIR}/%/build.log: ${TRANSLATION_DIR}/%/src/wrapper.rs +${TRANSLATION_DIR}/%/build.log: ${TRANSLATION_DIR}/%/src/lib.rs -export RUSTFLAGS=${RUSTFLAGS} && cargo build --quiet --manifest-path ${TRANSLATION_DIR}/$*/Cargo.toml 2> ${TRANSLATION_DIR}/$*/build.log @cat ${TRANSLATION_DIR}/$*/build.log -.PRECIOUS: ${TRANSLATION_DIR}/unsafety.json -${TRANSLATION_DIR}/unsafety.json: ${TRANSLATION_DIR}/build.log - uv run --with-requirements ${PIPELINE_DIR}/requirements.txt \ - python ${PIPELINE_DIR}/evaluate_unsafe_usage/invoke_unsafety.py \ - --container-name ${PIPELINE_TAG}/unsafety \ - $( ${TRANSLATION_DIR}/$*/build.log + @cat ${TRANSLATION_DIR}/$*/build.log # test .PHONY: test -test: ${TRANSLATION_DIR}/cargo_test.log ; +test: ${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log ; -.PRECIOUS: ${TRANSLATION_DIR}/cargo_test.log -${TRANSLATION_DIR}/cargo_test.log: ${TRANSLATION_DIR}/build.log $(patsubst %,${TRANSLATION_DIR}/%/cargo_test.log,${TARGETS}) - cat $^ > $@ +.PRECIOUS: ${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log +${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log: ${TRANSLATION_DIR}/build.log $(patsubst %,${TRANSLATION_DIR}/%/cargo_${EVALUATION_TEST}.log,${TARGETS}) + cat $(filter-out $<,$^) > $@ -.PRECIOUS: ${TRANSLATION_DIR}/%/cargo_test.log -${TRANSLATION_DIR}/%/cargo_test.log: ${TRANSLATION_DIR}/%/build.log ${TRANSLATION_DIR}/%/tests/test_cases.rs - if [ $$(stat -c %s ${TRANSLATION_DIR}/$*/build.log) = 0 ]; then \ - cargo test --manifest-path ${TRANSLATION_DIR}/$*/Cargo.toml --test test_cases | tee $@ ; \ - else \ - find test_vectors/$* -name '*.json' -exec echo "test {} ... FAILED" \; | tee $@ ; \ - fi \ +.PRECIOUS: ${TRANSLATION_DIR}/%/cargo_${EVALUATION_TEST}.log +${TRANSLATION_DIR}/%/cargo_${EVALUATION_TEST}.log: ${TRANSLATION_DIR}/%/build.log ${TRANSLATION_DIR}/%/tests/${EVALUATION_TEST}.rs | ${TRANSLATION_DIR}/%/Cargo.toml + uv run python -m ideas.evaluate manifest=${TRANSLATION_DIR}/$*/Cargo.toml \ + test_cases=${EVALUATION_TEST} \ + output_file=$@ +# convert cando tests .PRECIOUS: ${TRANSLATION_DIR}/%/tests/test_cases.rs -${TRANSLATION_DIR}/%/tests/test_cases.rs: | ${TRANSLATION_DIR}/%/Cargo.toml runner/Cargo.toml build-ninja/lib%.so.type - -uv run python -m ideas.convert_tests runner_manifest=runner/Cargo.toml \ +${TRANSLATION_DIR}/%/tests/test_cases.rs: | ${TEST_FILES} ${TRANSLATION_DIR}/%/Cargo.toml runner/Cargo.toml build-ninja/lib%.so.sources + uv run python -m ideas.convert_tests runner_manifest=runner/Cargo.toml \ vcs=${VCS} \ template=${MAKEFILE_DIR}/tools/rust_tests/lib_testing.rs \ output=tests/test_cases.rs \ @@ -191,17 +188,13 @@ ${TRANSLATION_DIR}/%/tests/test_cases.rs: | ${TRANSLATION_DIR}/%/Cargo.toml runn hydra.output_subdir=.convert_tests \ hydra.run.dir=${TRANSLATION_DIR}/$* -${TRANSLATION_DIR}/%/tests/test_cases.rs: | ${TRANSLATION_DIR}/%/Cargo.toml build-ninja/%.type - -uv run python -m ideas.convert_tests vcs=${VCS} \ +${TRANSLATION_DIR}/%/tests/test_cases.rs: | ${TEST_FILES} ${TRANSLATION_DIR}/%/Cargo.toml build-ninja/%.sources + uv run python -m ideas.convert_tests vcs=${VCS} \ output=tests/test_cases.rs \ 'test_vectors=[$(shell echo "$(TEST_FILES)" | tr ' ' ',')]' \ hydra.output_subdir=.convert_tests \ hydra.run.dir=${TRANSLATION_DIR}/$* -${TRANSLATION_DIR}/%/tests/test_cases.rs: - mkdir -p $(@D) - touch $@ - # can't rely on test vectors without explicit targets .PRECIOUS: test_vectors/%.json test_vectors/%.json: @@ -213,23 +206,31 @@ test_vectors/%/%.json: # testgen for each C target -.PHONY: testgen_target -testgen_target: $(patsubst %,test_crates/%/tests/test_assert.rs,${TARGETS}) ; - .PRECIOUS: test_crates/%/tests/test_assert.rs -test_crates/%/tests/test_assert.rs: | ${TRANSLATION_DIR}/%/src/lib.c build-ninja/lib%.so.type - -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) test_crates/$*/tests/test_assert.rs +test_crates/%/tests/test_assert.rs: | build-ninja/lib%.so.sources + -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) $@ -.PRECIOUS: test_crates/%/tests/test_assert.rs -test_crates/%/tests/test_assert.rs: | ${TRANSLATION_DIR}/%/src/main.c | build-ninja/%.type - -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) test_crates/$*/tests/test_assert.rs +test_crates/%/tests/test_assert.rs: | build-ninja/%.sources + -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) $@ -${TRANSLATION_DIR}/%/tests/test_assert.rs: test_crates/%/tests/test_assert.rs | build-ninja/lib%.so.type +.PRECIOUS: ${TRANSLATION_DIR}/%/tests/test_assert.rs +${TRANSLATION_DIR}/%/tests/test_assert.rs: test_crates/%/tests/test_assert.rs mkdir -p $(dir $@) - cp test_crates/$*/tests/test_assert.rs $@ + cp $< $@ + +# test wrappers instead of bindings +.PRECIOUS: ${TRANSLATION_DIR}/%/tests/test_assert_wrapper.rs +${TRANSLATION_DIR}/%/tests/test_assert_wrapper.rs: test_crates/%/tests/test_assert.rs + cat $< | sed 's/$*::binding::/$*::wrapper::/g' > $@ -${TRANSLATION_DIR}/%/tests/test_assert.rs: | build-ninja/%.type - $(error Agent cannot generate tests for binary targets yet!) +# smoke test +.PRECIOUS: ${TRANSLATION_DIR}/%/tests/smoke.rs +${TRANSLATION_DIR}/%/tests/smoke.rs: + mkdir -p $(dir $@) + echo "#[test]" >> $@ + echo "fn smoke() {" >> $@ + echo " assert_eq!(1, 1);" >> $@ + echo "}" >> $@ # clean diff --git a/LEARNING.mk b/LEARNING.mk deleted file mode 100644 index dfb5575..0000000 --- a/LEARNING.mk +++ /dev/null @@ -1,63 +0,0 @@ -# -# Copyright (C) 2026 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MAKEFILE_DIR := $(realpath $(dir $(MAKEFILE_PATH))) -EXAMPLES_DIR := examples/Test-Corpus/Public-Tests - -PROVIDER ?= hosted_vllm## Provider to use with DSPy/LiteLLM -MODEL ?= Qwen/Qwen3-Coder-30B-A3B-Instruct## Model to use to translate -REVISION ?= None## Revision of model to load in vLLM -HOST ?= localhost -PORT ?= 8000## Port to use for vLLM -BASE_URL ?= http://${HOST}:${PORT}/v1## Base URL of vLLM server - -DATA_DIR ?= translation.$(shell git rev-parse HEAD)## Directory to collect data from -TEACHER_PROVIDER ?= openrouter## GEPA teacher model provider -TEACHER_MODEL ?= openai/gpt-5-mini## GEPA teacher model name -TEACHER_BASE_URL ?= https://openrouter.ai/api/v1## GEPA teacher model base URL -REFLECT_PROVIDER ?= openrouter## GEPA reflection model provider -REFLECT_MODEL ?= openai/gpt-5.1## GEPA reflection model name -REFLECT_BASE_URL ?= https://openrouter.ai/api/v1## GEPA reflection model base URL - - -EXAMPLES ?= $(sort $(patsubst %/test_case,%,$(shell find ${EXAMPLES_DIR} -maxdepth 3 -name test_case -type d)))## List of examples to run on -ifeq ($(EXAMPLES),) -$(warning No projects found in ${EXAMPLES_DIR}. You may need to re-run commands!) -endif - -.PRECIOUS: student_examples.lst -student_examples.lst: - -@$(MAKE) -j128 -f ${MAKEFILE_DIR}/Makefile examples/wrapper \ - TRANSLATION_DIR=${DATA_DIR}.student \ - PROVIDER=${PROVIDER} \ - MODEL=${MODEL} \ - BASE_URL=${BASE_URL} - @echo "$(EXAMPLES)" | tr ' ' '\n' | sort | xargs realpath | sed "s|$$|/${DATA_DIR}.student|" > $@ - -.PRECIOUS: teacher_examples.lst -teacher_examples.lst: - -@$(MAKE) -j128 -f ${MAKEFILE_DIR}/Makefile examples/wrapper \ - TRANSLATION_DIR=${DATA_DIR}.teacher \ - PROVIDER=${TEACHER_PROVIDER} \ - MODEL=${TEACHER_MODEL} \ - BASE_URL=${TEACHER_BASE_URL} - @echo "$(EXAMPLES)" | tr ' ' '\n' | sort | xargs realpath | sed "s|$$|/${DATA_DIR}.teacher|" > $@ - -.PHONY: learn/translate -learn/translate:## Learn a prompt for translating C to Rust -learn/translate: student_examples.lst teacher_examples.lst - uv run python -m ideas.learn.translate \ - student_examples=$(realpath student_examples.lst) \ - teacher_examples=$(realpath teacher_examples.lst) \ - model.name=${PROVIDER}/${MODEL} \ - model.base_url=${BASE_URL} \ - reflect_model.name=${REFLECT_PROVIDER}/${REFLECT_MODEL} \ - reflect_model.base_url=${REFLECT_BASE_URL} - - -clean: - rm -f student_examples.lst teacher_examples.lst diff --git a/Makefile b/Makefile index 57fad59..df83e8d 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,6 @@ MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) MAKEFILE_DIR := $(realpath $(dir $(MAKEFILE_PATH))) -PIPELINE_DIR := lib/pipeline_automation -PIPELINE_TAG := ideas/$(shell git rev-list -1 HEAD -- ${PIPELINE_DIR}) EXAMPLES_DIR := examples IDEAS_MAKEFILE := $(MAKEFILE_DIR)/IDEAS.mk AGENTS_MAKEFILE := $(MAKEFILE_DIR)/AGENTS.mk @@ -25,9 +23,11 @@ TRANSLATE_ARGS ?= ## Args to pass to IDEAS translation RUSTFLAGS ?= -Awarnings## Flags to build Rust translation VERBOSE ?= 0## Whether to output failed/partial projects in summaries VCS ?= git## Whether to use version control during translation. Options: ['git', 'none'] +CC ?= clang## C compiler to use for translation and building +EVALUATION_TEST ?= test_cases## Evaluation test directory/name to run; defaults to `test_cases` # Pass these variables to other Makefiles -export PROVIDER MODEL BASE_URL TRANSLATION_DIR RUSTFLAGS +export PROVIDER MODEL BASE_URL TRANSLATION_DIR RUSTFLAGS CC EVALUATION_TEST EXAMPLES ?= $(sort $(patsubst %/test_case,%,$(shell find ${EXAMPLES_DIR} -maxdepth 3 -name test_case -type d)))## List of examples to run on ifeq ($(EXAMPLES),) @@ -60,27 +60,21 @@ docker: docker/docker_build.log ideas-$(shell id -u) bash -.PHONY: docker/build_measurements -docker/build_measurements:## Build measurement Docker images -docker/build_measurements: ${PIPELINE_DIR}/evaluate_unsafe_usage/unsafety.Dockerfile \ - ${PIPELINE_DIR}/idiomaticity/idiomaticity_measurements.Dockerfile - docker build -t ${PIPELINE_TAG}/unsafety \ - -f ${PIPELINE_DIR}/evaluate_unsafe_usage/unsafety.Dockerfile \ - ${PIPELINE_DIR}/evaluate_unsafe_usage/ - docker build -t ${PIPELINE_TAG}/idiomaticity \ - -f ${PIPELINE_DIR}/idiomaticity/idiomaticity_measurements.Dockerfile \ - ${PIPELINE_DIR}/idiomaticity/ - .PHONY: install install: install-uv install-rust ## Install uv and Rust .PHONY: install-uv -install-uv:## Install uv@0.10.9 - curl -LsSf https://astral.sh/uv/0.10.9/install.sh | sh +install-uv:## Install uv@0.11.13 + curl -LsSf https://astral.sh/uv/0.11.13/install.sh | sh .PHONY: install-rust -install-rust:## Install Rust@1.88.0 +install-rust:## Install Rust@1.88.0 and tools curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.88.0 + rustup component add rustfmt + rustup component add llvm-tools-preview --toolchain 1.88.0-x86_64-unknown-linux-gnu + cargo install bindgen-cli --version 0.72.1 + cargo install cargo-llvm-cov --version 0.8.6 + cargo install cargo-nextest --version 0.9.114 --locked .PHONY: install-clang install-clang:## Install Clang-21, must be sudo @@ -89,6 +83,10 @@ install-clang:## Install Clang-21, must be sudo -./llvm.sh 21 all rm ./llvm.sh +.PHONY: install-sys-deps +install-sys-deps:## Install system dependencies, must be sudo + apt install libpcre3-dev + .PHONY: serve serve:## Start vLLM server uv run --no-project --python 3.11 --with vllm==${VLLM_VERSION} vllm serve ${MODEL} --revision ${REVISION} --host ${HOST} --port ${PORT} --dtype auto ${VLLM_ARGS} @@ -129,20 +127,12 @@ examples/%/cmake: FORCE .PHONY: examples/testgen_agent -examples/testgen_agent:## Generate test vectors for all C examples with an agent +examples/testgen_agent:## Generate test vectors for all targets in all C examples with an agent examples/testgen_agent: $(addsuffix /testgen_agent,${EXAMPLES}) -examples/%/testgen_agent:## Generate test vectors for specific C example with an agent +examples/%/testgen_agent:## Generate test vectors for all targets in a specific C example with an agent examples/%/testgen_agent: FORCE - -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) -C $(@D) cmake - -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) -C $(@D) testgen - -.PHONY: examples/testgen_agent_target -examples/testgen_agent_target:## Generate test vectors for all targets in all C examples with an agent -examples/testgen_agent_target: $(addsuffix /testgen_agent_target,${EXAMPLES}) -examples/%/testgen_agent_target:## Generate test vectors for all targets in a specific C example with an agent -examples/%/testgen_agent_target: FORCE -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) cmake - -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) testgen_target + -@$(MAKE) -j1 -f $(AGENTS_MAKEFILE) -C $(@D) testgen_agent .PHONY: examples/translate @@ -159,15 +149,6 @@ examples/%/translate: FORCE -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) translate -.PHONY: examples/wrapper -examples/wrapper:## Generate C FFI wrappers for all examples -examples/wrapper: $(addsuffix /wrapper,${EXAMPLES}) -examples/%/wrapper:## Generate C FFI wrappers for specific example -examples/%/wrapper: FORCE - -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) cmake - -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) wrapper - - .PHONY: examples/build examples/build:## Build all translated examples examples/build: $(addsuffix /build,${EXAMPLES}) @@ -187,6 +168,7 @@ examples/%/build: FORCE -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) cmake -@$(MAKE) -j1 -f $(IDEAS_MAKEFILE) -C $(@D) build + .PHONY: examples/test examples/test:## Test all translated examples examples/test: $(addsuffix /test,${EXAMPLES}) @@ -201,18 +183,18 @@ ifneq (${VERBOSE},0) @echo "" endif @echo "--- Project Completion Count ---" - @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_test.log' -exec ./scripts/test_log_stats.sh {} \; | cut -d" " -f1 | sort | uniq -c + @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log' -exec ./scripts/test_log_stats.sh {} \; | cut -d" " -f1 | sort | uniq -c @echo "" ifneq (${VERBOSE},0) - @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_test.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "PARTIAL" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' + @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "PARTIAL" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' @echo "" - @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_test.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "MISSING" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' + @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "MISSING" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' @echo "" - @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_test.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "FAILED" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' + @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log' -exec ./scripts/test_log_stats.sh {} \; | egrep "FAILED" | sort | sed -e "s/${TRANSLATION_DIR}.*//gi" | sed -e 's/^/ /' @echo "" endif @echo "--- Aggregated Test Count ---" - @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_test.log' | xargs cat | grep -aE "^test \S+ ... \S+$$" | cut -d" " -f4 | sort | uniq -c + @find ${EXAMPLES} -path '*/${TRANSLATION_DIR}/cargo_${EVALUATION_TEST}.log' | xargs cat | grep -aE "^test \S+ ... \S+$$" | cut -d" " -f4 | sort | uniq -c @echo "\`\`\`" examples/%/test:## Test specific translated example examples/%/test: FORCE diff --git a/docker/ideas.Dockerfile b/docker/ideas.Dockerfile index 82a29dd..39bc469 100644 --- a/docker/ideas.Dockerfile +++ b/docker/ideas.Dockerfile @@ -15,19 +15,22 @@ RUN apt-get update && apt-get install -y \ git \ lsb-release \ software-properties-common \ - gnupg + gnupg \ + vim RUN wget https://apt.llvm.org/llvm.sh && \ chmod +x llvm.sh && \ - ./llvm.sh 21 all + ./llvm.sh 21 all && \ + rm ./llvm.sh # Install ninja-build RUN apt-get install -y ninja-build -# Install libssl-dev and dependencies +# Install possible translation dependencies RUN apt-get install -y \ zlib1g-dev \ - libssl-dev + libssl-dev \ + libpcre3-dev # Install specific version of cmake from binary ARG CMAKE_VERSION=4.1.2 @@ -36,17 +39,21 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cm /tmp/cmake-install.sh --skip-license --prefix=/usr/local && \ rm /tmp/cmake-install.sh -# Symlink /usr/bin/clang +# Symlink /usr/bin/clang and set it as default compiler RUN ln -s /usr/bin/clang-21 /usr/bin/clang +ENV CC=clang # Install uv ENV UV_INSTALL_DIR="/usr/local/bin" -RUN curl -LsSf https://astral.sh/uv/0.10.9/install.sh | sh +RUN curl -LsSf https://astral.sh/uv/0.11.13/install.sh | sh # Install Rust toolchain non-interactively RUN rustup default 1.88.0 RUN rustup component add rustfmt -RUN cargo install bindgen-cli +RUN rustup component add llvm-tools-preview --toolchain 1.88.0-x86_64-unknown-linux-gnu +RUN cargo install bindgen-cli --version 0.72.1 +RUN cargo install cargo-llvm-cov --version 0.8.6 +RUN cargo install cargo-nextest --version 0.9.114 --locked # Non-root user ARG USER_UID=1000 diff --git a/extract_info.cmake b/extract_info.cmake index 9258c6a..43758f4 100644 --- a/extract_info.cmake +++ b/extract_info.cmake @@ -52,12 +52,11 @@ function(extract_info) continue() endif() - get_target_property(TARGET_DIR ${TARGET} SOURCE_DIR) get_target_property(TARGET_LINK_LIBRARIES ${TARGET} LINK_LIBRARIES) message(STATUS " Found ${TARGET_TYPE} ${TARGET}") - # Recursively get target sources and and shared library/object sources + # Recursively get target sources and shared library/object sources get_target_sources(TARGET_SOURCES ${TARGET}) foreach(LINK_TARGET IN LISTS TARGET_LINK_LIBRARIES) if(TARGET ${LINK_TARGET}) @@ -67,15 +66,11 @@ function(extract_info) endforeach() list(JOIN TARGET_SOURCES "\n" TARGET_SOURCES) - if(${TARGET_TYPE} STREQUAL "OBJECT_LIBRARY") - set(TARGET_NAME ${TARGET}) - elseif(${TARGET_TYPE} STREQUAL "EXECUTABLE") + if(${TARGET_TYPE} STREQUAL "EXECUTABLE") set(TARGET_NAME $) else() set(TARGET_NAME $) endif() - file(GENERATE OUTPUT "${TARGET_NAME}.type" CONTENT "${TARGET_TYPE}") - file(GENERATE OUTPUT "${TARGET_NAME}.dir" CONTENT "${TARGET_DIR}") file(GENERATE OUTPUT "${TARGET_NAME}.sources" CONTENT "${TARGET_SOURCES}") endforeach() endfunction() diff --git a/pyproject.toml b/pyproject.toml index e026c2c..44b8f5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires-python = "~=3.13.0" dependencies = [ "clang==21.1.7", "dspy==3.1.2", - "kiss-agent-framework==0.2.27", + "kiss-agent-framework==2026.5.22", "hydra-core==1.3.2", "networkx==3.6.1", "tomlkit>=0.14.0", @@ -22,11 +22,11 @@ dev = [ "basedpyright==1.29.4", "pre-commit==4.2.0", "pytest==9.0.3", - "ruff==0.11.13", + "ruff==0.13.0", ] [build-system] -requires = ["uv_build>=0.10.9,<0.11.0"] +requires = ["uv_build>=0.11.13,<0.12"] build-backend = "uv_build" [tool.basedpyright] diff --git a/src/ideas/adapters.py b/src/ideas/adapters.py index 89475df..8f6398c 100644 --- a/src/ideas/adapters.py +++ b/src/ideas/adapters.py @@ -4,20 +4,80 @@ # SPDX-License-Identifier: Apache-2.0 # -from unittest.mock import patch - -from pydantic.fields import FieldInfo +from typing import Any +from collections.abc import Iterable import dspy +import json_repair import dspy.adapters.chat_adapter + +from unittest.mock import patch +from pydantic import ConfigDict, field_validator +from pydantic.fields import FieldInfo +from json_repair import loads as _json_repair_loads from dspy.adapters.chat_adapter import ChatAdapter as _ChatAdapter from dspy.adapters.utils import translate_field_type as _translate_field_type from dspy.signatures.utils import get_dspy_field_type +from dspy.signatures.signature import Signature class Code(dspy.Code): + model_config = ConfigDict(frozen=True) + + @field_validator("code", mode="before") + @classmethod + def _normalize_code(cls, v: str) -> str: + stripped = v.rstrip() + return stripped + "\n" if stripped else "" + + def __init__(self, code: str = "", **kwargs): + kwargs["code"] = code + super().__init__(**kwargs) + + @property + def text(self) -> str: + return self.code + + def __add__(self, other): + if not isinstance(other, Code): + return NotImplemented + if self.language != other.language: + raise TypeError(f"Cannot add {other.language} code to {self.language} code") + if not self.code: + return other + if not other.code: + return self + return type(self)(self.code + "\n" + other.code) + + @classmethod + def join(cls, parts: Iterable["Code"]) -> "Code": + result = cls() + seen = set() + for part in parts: + if part not in seen: + seen.add(part) + result = result + part + return result + + def __contains__(self, other): + if not isinstance(other, Code): + raise TypeError( + f"Cannot check membership of {type(other).__name__} in {type(self).__name__}" + ) + if self.language != other.language: + raise TypeError(f"Cannot check {other.language} code in {self.language} code") + return other.code in self.code + + def __eq__(self, other): + if not isinstance(other, Code): + return NotImplemented + return self.language == other.language and self.code == other.code + + def __hash__(self): + return hash((self.language, self.code)) + def format(self): - return f"```{self.language.lower()}\n{self.code.rstrip()}\n```" + return f"```{self.language.lower()}\n{self.code}```" @classmethod def short_description(cls): @@ -31,6 +91,23 @@ def format_field_structure(self, signature: type[dspy.Signature]) -> str: ): return super().format_field_structure(signature) + # Disable json_repair.loads for dspy.Code-like outputs when parsing completions. + # The following snippet is treated as "repaired json", which is clearly wrong: + # ```rust + # pub struct Program<'a> { + # pub code: &'a [i32], + # pub n: usize, + # pub ip: usize, + # } + # ``` + # That snippet is repaired as: + # a [i32] + # Unfortunately, dspy always runs json_repair.loads on every field and there is no + # option in dspy to disable it. + def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]: + with patch.object(json_repair, "loads", json_repair_loads): + return super().parse(signature, completion) + def translate_field_type(field_name: str, field_info: FieldInfo) -> str: # If a non-input field has a short_description, then use that. @@ -43,3 +120,10 @@ def translate_field_type(field_name: str, field_info: FieldInfo) -> str: desc = (" " * 8) + f"# note: the value you produce {desc}" if desc else "" return f"{{{field_name}}}{desc}" return _translate_field_type(field_name, field_info) + + +def json_repair_loads(json_str: str, *args, **kwargs): + # If json_str starts with a fence (```), then immediately fail repair. + if isinstance(json_str, str) and json_str.startswith("```"): + return "" + return _json_repair_loads(json_str, *args, **kwargs) diff --git a/src/ideas/agents/build.py b/src/ideas/agents/build.py new file mode 100644 index 0000000..6b8c756 --- /dev/null +++ b/src/ideas/agents/build.py @@ -0,0 +1,264 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +import re +import sys +import logging +import shutil +import textwrap +from pathlib import Path +from dataclasses import dataclass + +import hydra +from omegaconf import MISSING +from hydra.core.config_store import ConfigStore +from hydra.core.hydra_config import HydraConfig + +from ideas.tools import Crate, rustfmt +from ideas.tools import run_subprocess +from ideas.ast_rust import CodeRust, mangle +from ideas import create_translation_unit, extract_info_c +from ideas.init.consolidate import get_symbols_and_dependencies + +logger = logging.getLogger("ideas.init.build") + + +@dataclass +class BuildConfig: + instrumentation: str = MISSING + vcs: str = "none" + + def __post_init__(self): + if self.vcs not in ["git", "none"]: + raise ValueError(f"Invalid VCS: {self.vcs}!") + + if self.instrumentation not in ["coverage", "sanitizers"]: + raise ValueError(f"Invalid instrumentation: {self.instrumentation}!") + + +cs = ConfigStore.instance() +cs.store(name="init.build", node=BuildConfig) + + +def generate_build_script(instrumentation: str) -> tuple[str, str]: + build_options, build_commands = "", "" + if instrumentation == "coverage": + # With UBSan + build_options += '.flag("-fsanitize=undefined,nullability")' + build_options += '.flag("-fsanitize-trap=all")' + build_options += '.flag("-fprofile-instr-generate")' + build_options += '.flag("-fcoverage-mapping")' + + build_commands += 'println!("cargo:rustc-link-lib=dylib=crypto");' + build_commands += 'println!("cargo:rustc-link-lib=m");' + build_commands += ( + 'println!("cargo:rustc-link-search=/usr/lib/llvm-21/lib/clang/21/lib/linux/");' + ) + build_commands += ( + 'println!("cargo:rustc-link-lib=static=clang_rt.ubsan_standalone-x86_64");' + ) + elif instrumentation == "sanitizers": + # With UBSan and ASan + build_options += '.flag("-fsanitize=address,undefined,nullability")' + build_options += '.flag("-fsanitize-trap=all")' + build_commands += 'println!("cargo:rustc-link-lib=dylib=crypto");' + build_commands += 'println!("cargo:rustc-link-lib=m");' + build_commands += ( + 'println!("cargo:rustc-link-search=/usr/lib/llvm-21/lib/clang/21/lib/linux/");' + ) + build_commands += ( + 'println!("cargo:rustc-link-lib=static=clang_rt.ubsan_standalone-x86_64");' + ) + build_commands += 'println!("cargo:rustc-link-lib=static=clang_rt.asan-x86_64");' + elif instrumentation == "none": + build_commands += 'println!("cargo:rustc-link-lib=dylib=crypto");' + build_commands += 'println!("cargo:rustc-link-lib=m");' + + return build_options, build_commands + + +def write_build_script(crate: Crate, build_options: str = "", build_commands: str = "") -> Path: + c_src_path = crate.c_src_path.relative_to(crate.cargo_toml.parent) + build_rs_src = textwrap.dedent( + f""" + fn main() {{ + println!("cargo:rerun-if-changed={c_src_path}"); + + cc::Build::new() + .compiler("clang") + .warnings(false) + .file("{c_src_path}") + {build_options} + .compile("library"); + + {build_commands} + }} + """ + ) + + build_rs_path = crate.cargo_toml.parent / "build.rs" + build_rs_path.write_text(build_rs_src) + rustfmt(build_rs_path) + return build_rs_path + + +def write_symbol_binding(crate: Crate, symbol_name: str): + rust_spelling = mangle(symbol_name) + symbol_binding = get_linked_binding(rust_spelling, crate.c_src_path) + + symbol_binding_path = crate.rust_src_path.parent / "binding" / f"{rust_spelling}.rs" + symbol_binding_path.parent.mkdir(exist_ok=True) + symbol_binding_path.write_text( + "\n\n".join( + [ + "#![allow(unused_attributes)]", + symbol_binding.text, + ] + ) + ) + crate.vcs.add(symbol_binding_path) + + binding_path = crate.rust_src_path.parent / "binding.rs" + with binding_path.open("a+") as f: + f.write(f"pub mod {rust_spelling};\n") + crate.vcs.add(binding_path) + + +def get_linked_binding(function_name: str, c_src_path: Path, *bindgen_args: str) -> CodeRust: + # Use bindgen to generate binding to C symbol + bindgen = [ + "bindgen", + "--disable-header-comment", + "--no-doc-comments", + "--no-layout-tests", + "--allowlist-function", + function_name, + str(c_src_path), + "--", + *bindgen_args, + ] + ok, binding, error, _ = run_subprocess(bindgen) + if not ok: + raise ValueError(f"`{' '.join(bindgen)}` failed!\n{binding + error}") + + # Remove \u{1} prefix from link_name attribute + linked_binding = binding.replace('#[link_name = "\\u{1}', '#[link_name = "') + return CodeRust(linked_binding) + + +def strip_instrumentation(crate: Crate) -> Path: + # Remove coverage script and data files + if (crate.cargo_toml.parent / "measure_coverage.sh").is_file(): + (crate.cargo_toml.parent / "measure_coverage.sh").unlink() + for prof_file in crate.cargo_toml.parent.glob("**/*.profraw"): + prof_file.unlink() + for prof_file in crate.cargo_toml.parent.glob("**/*.profdata"): + prof_file.unlink() + if (crate.cargo_toml.parent / "profraw").is_dir(): + shutil.rmtree(crate.cargo_toml.parent / "profraw") + if (crate.cargo_toml.parent / "json").is_dir(): + shutil.rmtree(crate.cargo_toml.parent / "json") + + # Rewrite `build.rs` to remove all instrumentation + build_options, build_commands = generate_build_script("none") + build_rs_path = write_build_script( + crate, build_options=build_options, build_commands=build_commands + ) + + # Attempt to build the crate + builds, feedback = crate.cargo_build() + if not builds: + raise RuntimeError( + f"Crate at {crate.cargo_toml.parent} does not build without instrumentation!\n{feedback}" + ) + + return build_rs_path + + +def _main(cfg: BuildConfig) -> None: + output_dir = Path(HydraConfig.get().runtime.output_dir) + + # Fetch crate + crate = Crate( + cargo_toml=output_dir / "Cargo.toml", + vcs=cfg.vcs, # type: ignore[reportArgumentType] + ) + + # Get global symbol table + tu = create_translation_unit(crate.c_src_path) + asts = [extract_info_c(tu)] + symbols, _ = get_symbols_and_dependencies( + asts, external_symbol_names=["c:@F@main"] if crate.is_bin else None + ) + global_functions = [ + s for s in symbols.values() if s.is_global and (s.is_function and s.is_definition) + ] + + # Write build.rs file + build_options, build_commands = generate_build_script(cfg.instrumentation) + build_rs_path = write_build_script( + crate, build_options=build_options, build_commands=build_commands + ) + crate.vcs.add(build_rs_path) + msg = f"Wrote `build.rs` at {build_rs_path}" + logger.info(msg) + crate.vcs.commit(msg) + + # Verify build with build.rs + builds, feedback = crate.cargo_build() + if not builds: + raise RuntimeError(f"Crate at {output_dir} does not build with build.rs!\n{feedback}") + + # Write main function and binding to it + main_function = "#![no_main]" if crate.is_bin else "" + with crate.rust_src_path.open("a+") as f: + f.write(main_function) + crate.vcs.add(crate.rust_src_path) + crate.vcs.commit("Added main function (if any) to Rust source") + + # Generate Rust bindings for public library functions + if not crate.is_bin: + binding_path = crate.rust_src_path.parent / "binding.rs" + binding_path.write_text("") + for symbol in global_functions: + if not (symbol.is_function and symbol.is_definition and symbol.is_global): + continue + write_symbol_binding(crate, symbol.spelling) + logger.info("Generated bindings for all global functions") + + # Make the bindings module visible in the crate + rust_src = crate.rust_src_path.read_text() + BINDING_MOD = "pub mod binding;" + if not re.search(f"^{re.escape(BINDING_MOD)}$", rust_src, flags=re.MULTILINE): + crate.rust_src_path.write_text("\n\n".join([rust_src, BINDING_MOD])) + msg = f"Referenced `{BINDING_MOD}` in {crate.rust_src_path}" + logger.info(msg) + else: + msg = f"Binding module `{BINDING_MOD}` was already referenced in {crate.rust_src_path}!" + logger.warning(msg) + crate.vcs.add(crate.rust_src_path) + crate.vcs.commit(msg) + + # Attempt a final build + builds, feedback = crate.cargo_build() + if not builds: + raise RuntimeError(f"Crate at {output_dir} does not build with build.rs!\n{feedback}") + + # Clean on exit + crate.cargo_clean() + + +@hydra.main(version_base=None, config_name="init.build") +def main(cfg: BuildConfig) -> None: + try: + _main(cfg) + except Exception as e: + logger.exception(e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/ideas/agents/printer.py b/src/ideas/agents/printer.py index e0d7b4f..17b347a 100644 --- a/src/ideas/agents/printer.py +++ b/src/ideas/agents/printer.py @@ -6,12 +6,14 @@ import sys import logging -from typing import TextIO -from rich.text import Text +from typing import Any, TextIO +from rich.text import Text from rich.console import Console +from rich.panel import Panel from kiss.core.print_to_console import ConsolePrinter +from kiss.core.printer import truncate_result class ConsoleTee: @@ -62,3 +64,34 @@ def __init__(self, logger: logging.Logger, level: int = logging.INFO): tee = ConsoleTee(sys.__stdout__ or sys.stdout, logger, level) super().__init__(file=tee) self._console = Console(highlight=False, file=tee) # type: ignore[reportArgumentType] + + def print(self, content: Any, type: str = "text", **kwargs: Any) -> str: + if type == "tool_result": + self._flush_newline() + self._print_tool_result(str(content), kwargs.get("is_error", False)) + return "" + + if type == "usage_info": + self._flush_newline() + self._console.print( + Panel( + Text(str(content).strip(), style="dim italic"), + border_style="dim", + padding=(0, 1), + expand=True, + ) + ) + return "" + + return super().print(content, type=type, **kwargs) + + def _print_tool_result(self, content: str, is_error: bool = True) -> None: + style = "red" if is_error else "green" + self._console.rule("FAILED" if is_error else "OK", style=style, align="center") + if not self._bash_streamed: + display = truncate_result(content) + for line in display.splitlines(): + self._file.write(line + "\n") + self._file.flush() + self._bash_streamed = False + self._console.rule(style=style) diff --git a/src/ideas/agents/testgen.py b/src/ideas/agents/testgen.py index 0afd39e..d676d95 100644 --- a/src/ideas/agents/testgen.py +++ b/src/ideas/agents/testgen.py @@ -10,6 +10,7 @@ import logging import tempfile import textwrap +import time import shutil from pathlib import Path from dataclasses import dataclass @@ -19,212 +20,119 @@ from hydra.core.config_store import ConfigStore from hydra.core.hydra_config import HydraConfig +from ideas.tools import Crate from ideas.agents.printer import ConsoleTee, LoggingConsolePrinter -from ideas.tools import run_subprocess +from ideas.agents.build import strip_instrumentation +from ideas.agents.utils import ( + NEXTEST_DUMMY_TEST, + nextest_config, + write_coverage_script, + write_collect_script, + write_extract_json_script, +) from kiss.agents.sorcar.useful_tools import UsefulTools from kiss.core.relentless_agent import RelentlessAgent +from kiss.core.kiss_error import KISSError logger = logging.getLogger("ideas.agents.testgen") @dataclass class TestgenConfig: + cargo_toml: Path = MISSING model: str = MISSING c_code: Path = MISSING project_name: str = MISSING - test_vectors_out: Path = MISSING test_crate_out: Path = MISSING - num_vectors: int = 3 - desired_symbols: int = 3 + guarantee_assert_tests: bool = False + collect_to_assert: bool = False + target_coverage: int = 90 def __post_init__(self): - if not self.c_code.exists(): - raise ValueError( - f"c_code must be a directory containing a CMake C project or a single C file, got: {self.c_code}" - ) + if not self.c_code.is_file(): + raise ValueError(f"c_code must be a single C file, got: {self.c_code}") @dataclass class TestgenInstructions: - analyze_dir: str = textwrap.dedent( - """ - ## Step 1 – Analyze the C project ## - Carefully read and understand the C project rooted at `{c_proj_path}`. - - Inspect the CMakeLists.txt to learn: - - the project / library name - - all source files and include directories - - any required link libraries (e.g. `-lm`) - - List **all** top-level (exported / non-static) library functions declared in the - public header(s) under `{c_proj_path}/include`. - Write only their function names (no declaration or body) to `{c_proj_path}/functions.lst`, - newline separated. - """ - ) - analyze_file: str = textwrap.dedent( """ ## Step 1 – Analyze the standalone C file ## Carefully read and understand the single C source file at - `{c_proj_path}/{c_filename}`. + `{c_proj_path}/lib.c`. + This is a **standalone** C library file that should **never** be edited. - This is a **standalone** C file (no CMake project, no separate headers). - All declarations and definitions live in this one file. - - List **all** non-static functions defined in the file. - Write only their function names (no declaration or body) to `{c_proj_path}/functions.lst`, - newline separated. - """ - ) - - analyze_select: str = textwrap.dedent( - """ - From that list, select **up to {desired_symbols}** functions that are the best - candidates for black-box testing. - - The selected functions **must** have all their dependencies defined in the project. - If they reference functions that are **only declared**, they **cannot** be tested. - - Only select fewer than {desired_symbols} if there are not - that many functions. Prefer functions that: - - are **high-level entry points** (i.e. they orchestrate significant portions - of the code's logic rather than being small utility helpers) - - accept rich input (structs, arrays, multiple parameters) so that a single - call exercises many internal code-paths - - together give broad coverage of the public API - Write only their function names (no declaration or body) to `{c_proj_path}/selected.lst`, - newline separated. - - For each of the selected functions analyze: which parameters are **input-only**, - which are **output-only** (written by the callee), and which are **in/out** to understand + For each exported function analyze: which parameters are **input-only**, + which are **output-only** (written by the callee), and which are **modified in-place** to understand how to set up its test data and collect its outputs. - """ - ) - build_rs: str = textwrap.dedent( - """ - ## Step 2 – Create a Rust crate with a `build.rs` that compiles and links the C project ## - Initialize a new Rust **library** crate at `{rs_crate_path}`: - ```bash - cargo init --lib --edition=2024 --vcs none --name= {rs_crate_path} - ``` - - Add the `cc` build dependency and the following dev-dependencies to `Cargo.toml`: - ```toml - [build-dependencies] - cc = "1.2.59" - - [dev-dependencies] - serde = {{ version = "1", features = ["derive"] }} - serde_json = "1" - ``` + **Infinite looping:** If any function (including static ones) loops infinitely, + identify all relevant paths and their trigger conditions. - Write a `{rs_crate_path}/build.rs` that: - 1. Uses `cc::Build::new()` with `.compiler("clang")` to compile **all** C source files discovered in Step 1. - 2. Adds the correct include directories so the C headers are found. - 3. Uses `.warnings(false)` to suppress warnings. - 4. Uses `.std("c99")` to specify the C standard. - 5. Links any extra system libraries the C project requires (e.g. `println!("cargo::rustc-link-lib=m");`). + **Undefined behavior:** Carefully analyze the C code for possible undefined behavior (UB). """ ) - bindgen_dir: str = textwrap.dedent( + build_rs: str = textwrap.dedent( """ - ### Obtain the exact FFI API with `bindgen` ### - Before populating the crate sources, use `bindgen` on the shell to generate the - correct Rust FFI declarations for the selected functions from Step 1. + ## Step 2 – Analyze test crate ## + The directory at {rs_crate_path} contains a Rust crate that links the C code + and has **no** code of its own. - Run a **separate** `bindgen` invocation for each function and **redirect each - output directly** into its own binding module file: - ```bash - mkdir -p {rs_crate_path}/src/binding - BINDGEN_EXTRA_CLANG_ARGS="-I" bindgen \ - --disable-header-comment --no-doc-comments --no-layout-tests \ - \ - --allowlist-function \ - > {rs_crate_path}/src/binding/.rs - ``` + Analyze the `Cargo.toml` file and the `build.rs` files, and understand how they link the C code. + The crate is a **library** crate, and all exported functions have pre-generated C FFI bindings. - Where you must properly identify: - - `` – one or more `-I` arguments pointing to the - C include directories discovered in Step 1. If multiple header directories - are needed, list them all as space-separated `-I` arguments inside - `BINDGEN_EXTRA_CLANG_ARGS`. - - `` – the public header that declares the function. - - `` – the exact C function name (one per invocation). - """ - ) + These bindings have been generated by `bindgen` and placed in the `src/binding` directory, one file per function. + They are **correct** and **definitive** and their interfaces should **never** be changed. - bindgen_file: str = textwrap.dedent( - """ - ### Obtain the exact FFI API with `bindgen` ### - Before populating the crate sources, use `bindgen` on the shell to generate the - correct Rust FFI declarations for the selected functions from Step 1. + The `build.rs` and `bindings` modules can **never** be modified, no matter the circumstances. - Since this is a standalone C file with no separate headers, run `bindgen` - directly on the source file. Run a **separate** invocation for each function - and redirect each output directly into its own binding module file: - ```bash - mkdir -p {rs_crate_path}/src/binding - bindgen \ - --disable-header-comment --no-doc-comments --no-layout-tests \ - {c_proj_path}/{c_filename} \ - --allowlist-function \ - > {rs_crate_path}/src/binding/.rs - ``` + ### Working directory ### + Execute `cd {rs_crate_path}` to enter the crate directory before any `cargo` command. + Build using `cargo build` to confirm the C code compiles and links. - Where `` is the exact C function name (one per invocation). + ### Test framework ### + The crate uses `cargo nextest` as the test framework exclusively. + This **guarantees** that all tests are run in parallel and that no test can rely on side effects from another test. + You **must** use `cargo nextest` to run tests, and you **must not** write any test that relies on shared state or side effects. """ ) - build_rs_librs: str = textwrap.dedent( + coverage_script: str = textwrap.dedent( """ - ### Critical: crate module layout ### - The crate **must** use a modular layout that keeps each symbol's bindgen output - in its own file. Create the following structure: + ### Measuring coverage ### + The crate is set up to measure source-based coverage of the C code with LLVM's sanitizers and coverage tools. + Understand how this is done by analyzing the `build.rs` file and the `Cargo.toml`. - 1. **`{rs_crate_path}/src/lib.rs`** – contains **only**: - ```rust - pub mod binding; - ``` - - 2. **`{rs_crate_path}/src/binding.rs`** – contains one `pub mod ;` - line for **each** selected function. Example (if the selected functions are - `foo` and `bar`): - ```rust - pub mod foo; - pub mod bar; - ``` - - 3. **`{rs_crate_path}/src/binding/.rs`** – each file is the - **exact, unmodified** output of the corresponding `bindgen` invocation from - the previous step (already written there by the shell redirects above). - Do **not** hand-edit these files. + The script `{rs_crate_path}/measure_coverage.sh` is used to run tests and measure C code coverage of the tests that will be written. + This script must be run at any time to get an updated coverage report and identify untested code paths. - Build using `cargo build` to confirm the C code compiles and links. + This is the **only** way to measure coverage, so do not attempt to use other tools or methods. + Instead write any relevant experiments as collection tests, as indicated in Step 3 below. """ ) - gen_data_collection_tests: str = textwrap.dedent( + write_collect_tests: str = textwrap.dedent( """ ## Step 3 – Generate a data-collection test harness ## - Create `{rs_crate_path}/tests/test_collect.rs`. + Based on the C program analysis, design input tests that achieve high coverage of the program. + + Each test must be **independent** and **self-contained**: it must set up its own input data, + call the function under test, and capture all relevant output data without + relying on any shared state or side effects from other tests. + Because of `cargo nextest`'s parallel execution, clean-up on exit is **not** required. - ### 3a – FFI linkage (critical!) ### - **Do NOT** declare `unsafe extern "C"` blocks in the test file. - Instead, import the FFI functions through the binding modules using + ### 3a – FFI linkage ### + Import the FFI functions through the binding modules using **absolute crate paths**. The crate name is derived from the `name` field in `Cargo.toml` (with hyphens replaced by underscores). Import like this: ```rust use ::binding::::; ``` This is **mandatory** because the C static library is attached to the - library crate by `build.rs`. If the test declares its own `extern "C"` - block the linker will NOT find the C symbols and you will get - `undefined symbol` errors. + library crate by `build.rs`. ### 3b – `#[repr(C)]` struct mirrors ### Import the `#[repr(C)]` struct types through the binding modules (they were @@ -232,6 +140,11 @@ class TestgenInstructions: ```rust use ::binding::::; ``` + + If multiple functions use **exactly** the same struct type, + it will be generated in each relevant binding module with the exact name and layout, + so you can import it **only once** from any of them. + Then add `#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]` to **local** wrapper types or re-definitions of those structs that you need for JSON serialization. Because `serde` derives cannot be added to a type imported @@ -268,8 +181,7 @@ class TestgenInstructions: ``` ### 3d – Data-collection test functions ### - For each set of representative input values you choose (at least {num_vectors} - distinct sets), write a `#[test]` function named `collect_vector_` that: + For each set of input values, write a `#[test]` function named `collect_vector_` that: 1. Constructs a `LibState` with the chosen inputs (and outputs / return field zeroed). 2. Clones it into `lib_state_in`. 3. Calls the C function through `unsafe`, using the symbol imported from the @@ -283,39 +195,98 @@ class TestgenInstructions: println!("{{}}", serde_json::to_string_pretty(&vector).unwrap()); ``` - The chosen inputs should exercise a variety of code-paths in the C function: - - a zeroed / default / neutral input - - a "normal" input with representative non-trivial values - - an edge-case or boundary input + The chosen inputs **cannot exercise undefined behavior (UB)**! + If they do, instrumentation will make `cargo nextest run` output a + failed test and return an error, and test generation should be re-attempted. + If issues are identified, do **not** give up early. + + The chosen inputs should exercise a variety of code-paths in the C function, including: + - zeroed / default / neutral input + - "normal" inputs with representative non-trivial values + + The tests generated in this step are not meant to **assert** outputs, but only collect them + and they should **not** assume any state in the test file. + They are only meant to be a harness to collect input/output data and coverage information. + + ### NUL-terminated strings in C ### + If the C function takes string inputs, remember that they must be NUL-terminated. + Not respecting this will cause silent memory corruption and make it impossible to collect meaningful data! + To create a NUL-terminated string in Rust, you can create a `Vec` with the string bytes and a trailing `0`, + and then pass a pointer to its first element. + + ### Non-persistence ### + **All** collection tests must be designed to be run repeatedly without any clean-up, + and they must not rely on any side effects or shared state. + + ### Portable, self-contained tests ### + If the C code relies on pre-existing files on disk (e.g., through hardcoded paths), + you must ensure that all tests **locally** create any required files with the expected content before calling the function under test, + and that they do not rely on any pre-existing state on disk. + If multiple tests reference **exactly** the same file, place a safe Lock around all accesses to that file to + prevent race conditions; `cargo nextest` handles parallel execution by default otherwise. + You **cannot** rely on files on-disk: the goal is for the test file to be moved to some other crate and still work. + + If the C code relies on network access, you must ensure that all tests mock the network interactions locally + and do not rely on any external network state or connectivity. + """ + ) - Build and run tests in the crate with: - ```bash - cargo test --manifest-path {rs_crate_path}/Cargo.toml --quiet -- --nocapture - ``` - Verify that all tests pass and JSON is printed. + coverage_improvement: str = textwrap.dedent( + """ + ### The coverage metric ### + Use **branch coverage** to identify and exercise untested code paths. + + To improve branch coverage, generate interesting combinations of input arguments + with special attention to edge cases and boundary conditions. + + Ensure the new input values exercise **well-defined** code paths that improve branch coverage. + Exercising UB will be caught and rejected by the sanitizers! + + Aim to achieve branch coverage of at least {target_coverage}%%. + If this is not possible because of unreachable static functions, a lower coverage is acceptable. + After **three** consecutive attempts where branch coverage has not improved by at least 1 percentage point, + you may stop trying to improve coverage and proceed to the next step. + + Verify that all tests pass and JSON is correctly printed for **all** of them, + including tests on new symbols added for improving coverage. + """ + ) + + analyze_data_collection_tests: str = textwrap.dedent( + """ + ## Step 3 – Analyze the data-collection tests ## + The crate already contains some data-collection tests in `tests/test_collect.rs` + designed to print JSON outputs by running them and capturing their stdout. + + Carefully analyze the `tests/test_collect.rs` file and understand how it imports the FFI symbols, + how it defines the `LibState` struct and the `collect_vector_` tests, and how it prints the JSON output. + + Execute `cargo nextest run --test test_collect --nocapture 2>/dev/null` + to run the tests and see the JSON output they print on the `stdout` channel. + Validate **all** collection tests run successfully and print valid JSON with the expected structure. + + If tests pass **do NOT** modify them in any way at this stage. + If tests exercise UB or trip sanitizers, you must remove them. + If tests fail functionally, attempt to fix them until they pass and print the expected JSON. """ ) write_test_vectors: str = textwrap.dedent( """ - ## Step 4 – Save test vectors as JSON files ## + ## Step 4 – Save outputs as JSON files ## Create the directory `{test_vectors_path}`. Run each data-collection test **individually** and capture its stdout. Write the JSON output of each `collect_vector_` test to `{test_vectors_path}/.json`, where `` is the 1-based index. - Use `uv` to extract the JSON reliably – do NOT rely on grep/sed: + You **must** collect data from **all** tests, not just the initial + ones. + + Use the provided `extract_json.py` script to extract the JSON reliably – do NOT rely on grep/sed: ```bash - cargo test collect_vector_ -- --nocapture 2>/dev/null | \ - uv run python -c " - import sys, json - buf = sys.stdin.read() - start = buf.index('{{') - end = buf.rindex('}}') + 1 - obj = json.loads(buf[start:end]) - print(json.dumps(obj, indent=2)) - " > {test_vectors_path}/.json + cargo nextest run --nocapture -- collect_vector_ --exact 2>/dev/null | \ + uv run extract_json.py > {test_vectors_path}/.json ``` Verify each file is valid JSON with the expected `lib_state_in` / `lib_state_out` @@ -338,13 +309,18 @@ class TestgenInstructions: use ::binding::::; ``` - **Important**: `test_assert.rs` must **not** depend on `serde` or `serde_json`. + ### Plaintext literals and no serde ### + `test_assert.rs` must **not** depend on `serde` or `serde_json`. Because the crate's types are exact `bindgen` output (no serde derives), the assert tests reconstruct all values as **plain Rust literals** taken from the JSON files saved in Step 4. Do **not** `#[derive(Serialize, Deserialize)]` on any type in this file and do **not** add `use serde*` or `use serde_json*`. - For **each** JSON test vector saved in Step 4, write a `#[test]` function named + You **must** write an assertion test for each collection test, no matter + how many collection tests are there! + Write them one-by-one if there are too many. + + For **each** JSON output saved in Step 4, write a `#[test]` function named `test_vector_` that: 1. Reconstructs the `lib_state_in` values from the JSON file as Rust literals. 2. Calls the C function through `unsafe` using the imported symbol. @@ -358,57 +334,45 @@ class TestgenInstructions: - For integer / bool fields use `assert_eq!`. - For pointer-typed output fields, dereference the pointer (inside `unsafe`) and compare the pointed-to value rather than the pointer address itself. + - Focus on writing meaningful assertions that compare relevant output fields, + rather than writing minimal assertions that only check a few fields or non-null pointers. Once done, run: ```bash - cargo test --manifest-path {rs_crate_path}/Cargo.toml --quiet --test test_assert + cargo nextest run --test test_assert --cargo-quiet ``` - All tests **must** pass. - """ - ) - - deny_dependencies: str = textwrap.dedent( - """ - ## External dependencies ## - Apart from `cc` (build-dependency), `serde`, and `serde_json` (dev-dependencies), - do not add any other dependencies to the Cargo.toml file. + All tests **must** pass and not exercise any undefined behavior. """ ) simple_exit: str = textwrap.dedent( """ - Once all assert tests pass and JSON files are written, finish the task and exit. + Once the task is complete, exit immediately. Do not over-verify or generate extensive reports. """ ) @classmethod - def dir_task_description(cls) -> str: + def coverage_based(cls) -> str: return ( - cls.analyze_dir - + cls.analyze_select + cls.analyze_file + cls.build_rs - + cls.bindgen_dir - + cls.build_rs_librs - + cls.gen_data_collection_tests + + cls.coverage_script + + cls.write_collect_tests + + cls.coverage_improvement + cls.write_test_vectors + cls.write_assert_tests - + cls.deny_dependencies + cls.simple_exit ) @classmethod - def file_task_description(cls) -> str: + def collect_to_assert(cls) -> str: return ( cls.analyze_file - + cls.analyze_select + cls.build_rs - + cls.bindgen_file - + cls.build_rs_librs - + cls.gen_data_collection_tests + + cls.analyze_data_collection_tests + cls.write_test_vectors + cls.write_assert_tests - + cls.deny_dependencies + cls.simple_exit ) @@ -438,84 +402,113 @@ def _main(cfg: TestgenConfig) -> None: # Separately log the complete trajectory logger_trajectory = logging.getLogger("ideas.testgen.trajectory") logger_trajectory.propagate = False - fh = logging.FileHandler(output_dir / "testgen_trajectory.log") + fh = logging.FileHandler(output_dir / f"testgen_trajectory-{int(time.time())}.log") fh.setFormatter(ConsoleTee.StripANSIFormatter("%(asctime)s %(message)s")) logger_trajectory.addHandler(fh) # Simultaneous print and log to file printer = LoggingConsolePrinter(logger=logger_trajectory) - agent = RelentlessAgent(name="C library test vector generator") - - project_name = cfg.project_name - work_dir = Path(tempfile.mkdtemp()) / project_name - os.makedirs(work_dir) - - # Copy the C project into the working directory - c_proj_path = work_dir / "test_case" - is_single_file = Path(cfg.c_code).is_file() - if is_single_file: - # Coherent /tmp and on-disk paths - c_proj_path = work_dir / cfg.c_code.parent - os.makedirs(c_proj_path) - shutil.copy(cfg.c_code, c_proj_path / cfg.c_code.name) - else: - shutil.copytree(cfg.c_code, c_proj_path, dirs_exist_ok=True) + agent = RelentlessAgent(name="C library test generator") + + # Generate helper scripts and files in the crate + crate = Crate(output_dir / "Cargo.toml") + nextest_config(crate) + if not cfg.collect_to_assert: + write_coverage_script(crate) + write_collect_script(crate) + write_extract_json_script(crate) + + # Workspace + work_dir = Path(tempfile.mkdtemp()) + workspace_dir = work_dir / "test_crates" + shutil.copytree("test_crates", workspace_dir) + + # Remove all log files + for log_file in workspace_dir.glob("**/*.log"): + log_file.unlink() # Paths the agent will populate - rs_crate_path = work_dir / (cfg.test_crate_out if is_single_file else "testgen_crate") - test_vectors_path = work_dir / "test_vectors" + rs_crate_path = work_dir / cfg.test_crate_out + test_vectors_path = rs_crate_path / "json" + + # If assertion tests already exist, they must be correct + if (rs_crate_path / "tests" / "test_assert.rs").is_file(): + crate = Crate(rs_crate_path / "Cargo.toml") + ok, output, error, _ = crate.cargo_test("test_assert", quiet=True) + if not ok: + raise RuntimeError( + "Existing assertion tests failed to pass, previous agent did not clean them up!" + ) + logger.info( + f"Assertion tests already exist at {rs_crate_path / 'tests/test_assert.rs'}, skipping agent!" + ) + return + + # Hide instrumentation from conversion agent + if cfg.collect_to_assert: + strip_instrumentation(crate) # Build the task prompt task_description = ( - TestgenInstructions.file_task_description() - if is_single_file - else TestgenInstructions.dir_task_description() + TestgenInstructions.collect_to_assert() + if cfg.collect_to_assert + else TestgenInstructions.coverage_based() ) arguments = { - "c_proj_path": c_proj_path.relative_to(work_dir), + "c_proj_path": cfg.c_code.parent, "rs_crate_path": rs_crate_path.relative_to(work_dir), "test_vectors_path": test_vectors_path.relative_to(work_dir), - "num_vectors": cfg.num_vectors, - "desired_symbols": cfg.desired_symbols, + "target_coverage": cfg.target_coverage, } - if is_single_file: - arguments["c_filename"] = cfg.c_code.name task_description = task_description.format(**arguments) # Run agent in the work directory - original_dir = os.getcwd() os.chdir(work_dir) + try: + agent.run( + model_name=cfg.model, + prompt_template=task_description, + max_steps=100, + max_budget=4, + max_sub_sessions=1, + work_dir=str(work_dir), + tools=get_tools(), + printer=printer, + verbose=True, + ) + except KISSError as e: + logger.warning(f"Agent claims it failed with error: {e}. Clean-up will continue.") - agent.run( - model_name=cfg.model, - system_instructions="", - prompt_template=task_description, - max_steps=100, - max_budget=4, - max_sub_sessions=1, - work_dir=str(work_dir), - tools=get_tools(), - printer=printer, - verbose=True, - ) - # Verify that assertion tests pass - cargo_toml = work_dir / cfg.test_crate_out / "Cargo.toml" - ok, output, error, returncode = run_subprocess( - ["cargo", "test", "--manifest-path", str(cargo_toml), "--test", "test_assert"], - timeout=60, - ) + # Verify that collection tests exist + if not (rs_crate_path / "tests" / "test_collect.rs").is_file(): + raise RuntimeError( + f"Data collection tests were not found at {rs_crate_path / 'tests/test_collect.rs'}!" + ) + + # Strip instrumentation to ensure tests are correct and do not rely on it + crate = Crate(rs_crate_path / "Cargo.toml") + strip_instrumentation(crate) + ok, output, error, _ = crate.cargo_test("test_collect", quiet=True) if not ok: raise RuntimeError( - f"Assert tests failed for target {project_name}: {error}! Tests will not be used during hybrid build!" + f"Data collection tests failed to pass without instrumentation! Output:\n{output}\nError:\n{error}" ) - os.chdir(original_dir) - - # Copy test vectors - shutil.copytree(test_vectors_path, cfg.test_vectors_out, dirs_exist_ok=True) - # Copy test crate - shutil.copytree(rs_crate_path, cfg.test_crate_out, dirs_exist_ok=True) - # Copy C analysis results - shutil.copy(c_proj_path / "functions.lst", cfg.test_crate_out / "functions.lst") - shutil.copy(c_proj_path / "selected.lst", cfg.test_crate_out / "selected.lst") + + # Check if assertion tests pass + ok, output, error, _ = crate.cargo_test("test_assert", quiet=True) + if not ok: + logger.error(f"Assertion tests failed to pass! Output:\n{output}\nError:\n{error}") + # Remove incomplete assertion tests, if any + if (rs_crate_path / "tests" / "test_assert.rs").is_file(): + (rs_crate_path / "tests" / "test_assert.rs").unlink() + + # And replace with an always-passing test (nextest does not allow empty test files) + if cfg.guarantee_assert_tests: + logger.warning("Writing dummy test_assert.rs that always passes") + (rs_crate_path / "tests" / "test_assert.rs").write_text(NEXTEST_DUMMY_TEST) + + # Clean the crate and copy it back to the project directory + crate.cargo_clean() + shutil.copytree(rs_crate_path, output_dir, dirs_exist_ok=True) if __name__ == "__main__": diff --git a/src/ideas/agents/testgen_bin.py b/src/ideas/agents/testgen_bin.py new file mode 100644 index 0000000..c6ac1f7 --- /dev/null +++ b/src/ideas/agents/testgen_bin.py @@ -0,0 +1,464 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import sys +import os +import logging +import tempfile +import textwrap +import time +import shutil +from pathlib import Path +from dataclasses import dataclass + +import hydra +from omegaconf import MISSING +from hydra.core.config_store import ConfigStore +from hydra.core.hydra_config import HydraConfig + +from ideas.tools import Crate +from ideas.agents.printer import ConsoleTee, LoggingConsolePrinter +from ideas.agents.build import strip_instrumentation +from ideas.agents.utils import ( + NEXTEST_DUMMY_TEST, + nextest_config, + write_coverage_script, + write_collect_script, + write_extract_json_script, +) + +from kiss.agents.sorcar.useful_tools import UsefulTools +from kiss.core.relentless_agent import RelentlessAgent +from kiss.core.kiss_error import KISSError + +logger = logging.getLogger("ideas.agents.testgen_bin") + + +@dataclass +class TestgenConfig: + cargo_toml: Path = MISSING + model: str = MISSING + c_code: Path = MISSING + project_name: str = MISSING + test_crate_out: Path = MISSING + + guarantee_assert_tests: bool = False + collect_to_assert: bool = False + target_coverage: int = 90 + + def __post_init__(self): + if not self.c_code.exists(): + raise ValueError(f"c_code must be a single C file, got: {self.c_code}") + + +@dataclass +class TestgenInstructions: + analyze_file: str = textwrap.dedent( + """ + ## Step 1 – Analyze the standalone C file ## + Carefully read and understand the single C source file at + `{c_proj_path}/main.c`. + + This is a **standalone** C file that should **never** be edited. + If the file is too large to analyze in one go, focus on its `main` function. + + Analyze how the program uses `argc`/`argv`, whether it reads from `stdin`, + what it prints to `stdout`/`stderr`, which exit codes it returns, and which + system libraries it links against. + + **Interactive program handling:** Determine whether the program is **batch** + (runs and exits) or **interactive** (loops on `stdin`, e.g. `while(1)` + + `fgets`/`scanf`). If interactive, identify the exit condition (menu + choice, special command, or EOF only). + + **Infinite looping:** If the program loops infinitely, identify all relevant paths + and their trigger conditions. + + **Undefined behavior:** Carefully analyze the C code for possible undefined behavior (UB). + """ + ) + + build_rs: str = textwrap.dedent( + """ + ## Step 2 – Analyze test crate ## + The directory at {rs_crate_path} contains a Rust crate that links the C code + and has **no** code of its own. + + Analyze the `Cargo.toml` file and the `build.rs` files, and understand how they link the C code. + The crate is a **binary** crate, so the C `main` function is the real entry point of the final executable. + + ### Working directory ### + Execute `cd {rs_crate_path}` to enter the crate directory before any `cargo` command. + Build using `cargo build` to confirm the C code compiles and links. + + ### Test framework ### + The crate uses `cargo nextest` as the test framework exclusively. + This **guarantees** that all tests are run in parallel and that no test can rely on side effects from another test. + You **must** use `cargo nextest` to run tests, and you **must not** write any test that relies on shared state or side effects. + """ + ) + + coverage_script: str = textwrap.dedent( + """ + ### Measuring coverage ### + The crate is set up to measure source-based coverage of the C code with LLVM's sanitizers and coverage tools. + Understand how this is done by analyzing the `build.rs` file and the `Cargo.toml`. + + The script `{rs_crate_path}/measure_coverage.sh` is used to run tests and measure C code coverage of the tests that will be written. + This script must be run at any time to get an updated coverage report and identify untested code paths. + This script is **complete and correct** as-is, and the task is to write tests that can be measured with it. + + This is the **only** way to measure coverage, so do not attempt to use other tools or methods. + Instead write any relevant experiments as collection tests, as indicated in Step 3 below. + """ + ) + + write_collect_tests: str = textwrap.dedent( + """ + ## Step 3 – Generate a data-collection test harness ## + Based on the C program analysis, design input tests that achieve high coverage of the program. + + Each test must be **independent** and **self-contained**: it must set up its own input data, + call the function under test, and capture all relevant output data without + relying on any shared state or side effects from other tests. + Because of `cargo nextest`'s parallel execution, clean-up on exit is **not** required. + + The test cases **cannot exercise undefined behavior (UB) or infinite loops**! + If they do, instrumentation will make `cargo nextest run` output a + failed test and return an error, and they should be re-attempted. + + Include at least: + - a default / no-argument invocation (if the program supports it) + - a typical invocation with representative arguments + - an edge-case or boundary invocation (empty input, very long input, + special characters, etc.) + - an error path that triggers a non-zero exit code or stderr output + (if the program has any such path) + + The `{rs_crate_path}/tests/test_collect.rs` file begins with the `collect_and_print` + function that **must** be used to collect all test cases. + This file can be considered **complete and correct** as-is, and the task is to write test cases that call `collect_and_print`. + Note the `stdbuf` approach is **required** to ensure proper `libc` output buffering. + + For **each** test case, write a + `#[test]` function named `collect_` that calls `collect_and_print` + with the test case's name, args, and stdin. Example: + ```rust + #[test] + fn collect_() {{ + collect_and_print("", &["arg1", "arg2"], Some("stdin data")); + }} + ``` + Pass `None` for stdin when the test case has no input. + + The tests generated in this step are not meant to **assert** outputs, but only collect them + and they should **not** assume any state in the test file. + They are only meant to be a harness to collect input/output data and coverage information. + + ### NUL-terminated strings in C ### + If the C function takes string inputs, remember that they must be NUL-terminated. + Not respecting this will cause silent memory corruption and make it impossible to collect meaningful data! + To create a NUL-terminated string in Rust, you can create a `Vec` with the string bytes and a trailing `0`, + and then pass a pointer to its first element. + + ### Non-persistence ### + **All** collection tests must be designed to be run repeatedly without any clean-up, + and they must not rely on any side effects or shared state. + + ### Portable, self-contained tests ### + If the C code relies on pre-existing files on disk (e.g., through hardcoded paths), + you must ensure that all tests **locally** create any required files with the expected content before calling the function under test, + and that they do not rely on any pre-existing state on disk. + If multiple tests reference **exactly** the same file, place a safe Lock around all accesses to that file to + prevent race conditions; `cargo nextest` handles parallel execution by default otherwise. + You **cannot** rely on files on-disk: the goal is for the test file to be moved to some other crate and still work. + + If the C code relies on network access, you must ensure that all tests mock the network interactions locally + and do not rely on any external network state or connectivity. + """ + ) + + coverage_improvement: str = textwrap.dedent( + """ + ### The coverage metric ### + Use **branch coverage** to identify and exercise untested code paths. + + To improve branch coverage, generate interesting combinations of input arguments + and the `stdin` stream, with special attention to edge cases and boundary conditions. + Pay special attention to `libc` functions that may be used in the C code, + and generate inputs that trigger different code paths in them + (e.g. `strlen` with short vs long strings, `fgets` with input shorter vs longer than the buffer size, etc.). + + Ensure the new input values exercise **well-defined** code paths that improve branch coverage. + Exercising UB will be caught and rejected by the sanitizers! + + Aim to achieve branch coverage of at least {target_coverage}%%. + After **three** consecutive attempts where branch coverage has not improved by at least 1 percentage point, + you may stop trying to improve coverage and proceed to the next step. + + Verify that all tests pass and JSON is correctly printed for **all** of them, + including tests on new symbols added for improving coverage. + """ + ) + + analyze_data_collection_tests: str = textwrap.dedent( + """ + ## Step 3 – Analyze the data-collection tests ## + The crate already contains some data-collection tests in `tests/test_collect.rs` + designed to print JSON outputs by running them and capturing their stdout. + + Carefully analyze the `tests/test_collect.rs` file and understand how it imports the FFI symbols, + how it defines the `LibState` struct and the `collect_vector_` tests, and how it prints the JSON output. + + Execute `cargo nextest run --test test_collect --nocapture 2>/dev/null` + to run the tests and see the JSON output they print on the `stdout` channel. + Validate **all** collection tests run successfully and print valid JSON with the expected structure. + + If tests pass **do NOT** modify them in any way at this stage. + If tests exercise UB or trip sanitizers, you must remove them. + If tests fail functionally, attempt to fix them until they pass and print the expected JSON. + """ + ) + + write_test_vectors: str = textwrap.dedent( + """ + ## Step 4 – Save outputs as JSON files ## + Create the directory `{test_vectors_path}`. + + Run each data-collection test **individually** and capture its stdout. + Write the JSON output of each `collect_` test to + `{test_vectors_path}/.json`, where `` is the 1-based index. + + Use the provided `extract_json.py` script to extract the JSON reliably – do NOT rely on grep/sed: + ```bash + cargo nextest run --nocapture -- collect_vector_ --exact 2>/dev/null | \ + uv run extract_json.py > {test_vectors_path}/.json + ``` + + Verify each file is valid JSON by running + `uv run python -m json.tool {test_vectors_path}/.json`. + """ + ) + + write_assert_tests: str = textwrap.dedent( + """ + ## Step 5 – Write assert-style Rust tests ## + Create `{rs_crate_path}/tests/test_assert.rs`. + + Hardcode all expected values as Rust string literals taken from the + JSON files saved in Step 4. + + The file **must** use these imports and the structure below: + ```rust + use assert_cmd::Command; + use predicates::prelude::*; + ``` + + IMPORTANT: You **must** write an assertion test for each collection test, no matter + how many collection tests are there! + Write them one-by-one if there are too many. + + For **each** test case from the JSON, write a `#[test]` function named + `test_case_` following this exact pattern: + ```rust + #[test] + fn test_case_() {{ + let pkg_name_path = assert_cmd::cargo::cargo_bin(assert_cmd::pkg_name!()); + let pkg_name_path_str = pkg_name_path.to_str().unwrap(); + + Command::new("stdbuf") + .args(&["-e0", "-o0", pkg_name_path_str]) + // .args(&["a1", "a2"]) // only if args non-empty + // .write_stdin("data") // only if stdin non-empty + .assert() + .stdout("") + .stderr("") + .code(); + }} + ``` + + Rules: + - Use the **exact** stdout/stderr strings from the JSON, properly escaped + in Rust string literals. + - If expected stderr is empty use `.stderr("")`. + - If stderr contains variable content (PIDs, paths) use + `predicates::str::contains(...)` to capture **path-invariant** contents. + + Once done, run: + ```bash + cargo nextest run --test test_assert --cargo-quiet + ``` + All tests **must** pass and not exercise any undefined behavior. + """ + ) + + simple_exit: str = textwrap.dedent( + """ + Once the task is complete, exit immediately. + Do not over-verify or generate extensive reports. + """ + ) + + @classmethod + def coverage_based(cls) -> str: + return ( + cls.analyze_file + + cls.build_rs + + cls.coverage_script + + cls.write_collect_tests + + cls.coverage_improvement + + cls.write_test_vectors + + cls.write_assert_tests + + cls.simple_exit + ) + + @classmethod + def collect_to_assert(cls) -> str: + return ( + cls.analyze_file + + cls.build_rs + + cls.analyze_data_collection_tests + + cls.write_test_vectors + + cls.write_assert_tests + + cls.simple_exit + ) + + +cs = ConfigStore.instance() +cs.store(name="testgen", node=TestgenConfig) + + +def get_tools(): + useful_tools = UsefulTools() + return [useful_tools.Bash, useful_tools.Read, useful_tools.Edit, useful_tools.Write] + + +@hydra.main(version_base=None, config_name="testgen") +def main(cfg: TestgenConfig) -> None: + try: + _main(cfg) + except Exception as e: + logger.exception(e) + sys.exit(1) + + +def _main(cfg: TestgenConfig) -> None: + output_dir = Path(HydraConfig.get().runtime.output_dir) + + # Separately log the complete trajectory + logger_trajectory = logging.getLogger("ideas.testgen.trajectory") + logger_trajectory.propagate = False + fh = logging.FileHandler(output_dir / f"testgen_trajectory-{int(time.time())}.log") + fh.setFormatter(ConsoleTee.StripANSIFormatter("%(asctime)s %(message)s")) + logger_trajectory.addHandler(fh) + # Simultaneous print and log to file + printer = LoggingConsolePrinter(logger=logger_trajectory) + agent = RelentlessAgent(name="C executable test generator") + + # Generate helper scripts and files in the crate + crate = Crate(output_dir / "Cargo.toml") + nextest_config(crate) + if not cfg.collect_to_assert: + write_coverage_script(crate) + write_collect_script(crate) + write_extract_json_script(crate) + + # Workspace + work_dir = Path(tempfile.mkdtemp()) + workspace_dir = work_dir / "test_crates" + shutil.copytree("test_crates", workspace_dir) + + # Remove all log files + for log_file in workspace_dir.glob("**/*.log"): + log_file.unlink() + + # Paths the agent will populate + rs_crate_path = work_dir / cfg.test_crate_out + test_vectors_path = rs_crate_path / "json" + + # If assertion tests already exist, they must be correct + if (rs_crate_path / "tests" / "test_assert.rs").is_file(): + crate = Crate(rs_crate_path / "Cargo.toml") + ok, output, error, _ = crate.cargo_test("test_assert", quiet=True) + if not ok: + raise RuntimeError( + "Existing assertion tests failed to pass, previous agent did not clean them up!" + ) + logger.info( + f"Assertion tests already exist at {rs_crate_path / 'tests/test_assert.rs'}, skipping agent!" + ) + return + + # Hide instrumentation from conversion agent + if cfg.collect_to_assert: + strip_instrumentation(crate) + + # Build the task prompt + task_description = ( + TestgenInstructions.collect_to_assert() + if cfg.collect_to_assert + else TestgenInstructions.coverage_based() + ) + arguments = { + "c_proj_path": cfg.c_code.parent, + "rs_crate_path": rs_crate_path.relative_to(work_dir), + "test_vectors_path": test_vectors_path.relative_to(work_dir), + "target_coverage": cfg.target_coverage, + } + task_description = task_description.format(**arguments) + + # Run agent in the work directory + os.chdir(work_dir) + try: + agent.run( + model_name=cfg.model, + prompt_template=task_description, + max_steps=100, + max_budget=4, + max_sub_sessions=1, + work_dir=str(work_dir), + tools=get_tools(), + printer=printer, + verbose=True, + ) + except KISSError as e: + logger.warning(f"Agent claims it failed with error: {e}. Clean-up will continue.") + + # Verify that collection tests exist + if not (rs_crate_path / "tests" / "test_collect.rs").is_file(): + raise RuntimeError( + f"Data collection tests were not found at {rs_crate_path / 'tests/test_collect.rs'}!" + ) + + # Strip instrumentation to ensure tests are correct and do not rely on it + crate = Crate(rs_crate_path / "Cargo.toml") + strip_instrumentation(crate) + ok, output, error, _ = crate.cargo_test("test_collect", quiet=True) + if not ok: + raise RuntimeError( + f"Data collection tests failed to pass without instrumentation! Output:\n{output}\nError:\n{error}" + ) + + ok, output, error, _ = crate.cargo_test("test_assert", quiet=True) + if not ok: + logger.error(f"Assertion tests failed to pass! Output:\n{output}\nError:\n{error}") + # Remove incomplete assertion tests, if any + if (rs_crate_path / "tests" / "test_assert.rs").is_file(): + (rs_crate_path / "tests" / "test_assert.rs").unlink() + + # And replace with an always-passing test (nextest does not allow empty test files) + if cfg.guarantee_assert_tests: + logger.warning("Writing dummy test_assert.rs that always passes") + (rs_crate_path / "tests" / "test_assert.rs").write_text(NEXTEST_DUMMY_TEST) + + # Clean the crate and copy it back to the project directory + crate.cargo_clean() + shutil.copytree(rs_crate_path, output_dir, dirs_exist_ok=True) + + +if __name__ == "__main__": + main() diff --git a/src/ideas/agents/utils.py b/src/ideas/agents/utils.py new file mode 100644 index 0000000..c3b3366 --- /dev/null +++ b/src/ideas/agents/utils.py @@ -0,0 +1,112 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +import textwrap +import tomlkit +from pathlib import Path + +from ideas.tools import Crate +from ideas.convert_tests import rustfmt + + +NEXTEST_DUMMY_TEST = textwrap.dedent( + """ + #[test] + fn dummy_ideas_placeholder() { + assert_eq!(1 + 1, 2); + } + """ +).strip() + + +def nextest_config(crate: Crate): + nextest_config_path = crate.cargo_toml.parent / ".config" / "nextest.toml" + nextest_config_path.parent.mkdir(parents=True, exist_ok=True) + nextest_config_contents = { + "profile": { + "default": { + "fail-fast": False, + "slow-timeout": {"period": "30s", "terminate-after": 2}, + } + } + } + nextest_config_path.write_text(tomlkit.dumps(nextest_config_contents)) + + +def write_coverage_script(crate: Crate) -> Path: + coverage_script_path = crate.cargo_toml.parent / "measure_coverage.sh" + coverage_script_contents = textwrap.dedent( + """ + cargo llvm-cov nextest --include-ffi --no-report --test test_collect --no-fail-fast 2>/dev/null + cargo llvm-cov report --include-ffi + cargo llvm-cov report --include-ffi --text + """ + ).strip() + coverage_script_path.write_text(coverage_script_contents) + return coverage_script_path + + +def write_collect_script(crate: Crate) -> Path: + collect_path = crate.cargo_toml.parent / "tests" / "test_collect.rs" + collect_path.parent.mkdir(parents=True, exist_ok=True) + + if crate.is_bin: + collect_stub = textwrap.dedent( + """ + use std::os::unix::process::ExitStatusExt; + use assert_cmd::Command; + use serde_json; + + fn collect_and_print(name: &str, args: &[&str], stdin: Option<&str>) { + let pkg_name_path = assert_cmd::cargo::cargo_bin(assert_cmd::pkg_name!()); + let pkg_name_path_str = pkg_name_path.to_str().unwrap(); + + let mut cmd = Command::new("stdbuf"); + cmd.args(&["-e0", "-o0", pkg_name_path_str]); + if !args.is_empty() { + cmd.args(args); + } + if let Some(input) = stdin { + cmd.write_stdin(input); + } + let output = cmd.output().expect("failed to execute process"); + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let code = output.status.code().unwrap_or(-1); + if output.status.signal() == Some(libc::SIGILL) { + panic!("UBSAN detected during collection!"); + } + println!("{{"); + println!(" \"name\": \"{}\",", name); + println!(" \"stdout\": {},", serde_json::to_string(&*stdout).unwrap()); + println!(" \"stderr\": {},", serde_json::to_string(&*stderr).unwrap()); + println!(" \"exit_code\": {}", code); + println!("}}"); + } + """ + ).strip() + else: + collect_stub = "" + + with collect_path.open("a+", encoding="utf-8") as f: + f.write(collect_stub) + rustfmt(collect_path) + return collect_path + + +def write_extract_json_script(crate: Crate) -> Path: + extract_json_path = crate.cargo_toml.parent / "extract_json.py" + extract_json_contents = textwrap.dedent( + """ + import sys, json + buf = sys.stdin.read() + decoder = json.JSONDecoder() + obj, _ = decoder.raw_decode(buf, buf.index('{')) + print(json.dumps(obj, indent=2)) + """ + ).strip() + extract_json_path.write_text(extract_json_contents) + return extract_json_path diff --git a/src/ideas/ast.py b/src/ideas/ast.py index dc7ed20..0528f4e 100644 --- a/src/ideas/ast.py +++ b/src/ideas/ast.py @@ -4,23 +4,24 @@ # SPDX-License-Identifier: Apache-2.0 # +import re import logging from pathlib import Path from collections import defaultdict from collections.abc import Iterable -from functools import cached_property from dataclasses import dataclass, field from clang.cindex import TranslationUnit, TranslationUnitLoadError, Diagnostic from clang.cindex import Cursor, CursorKind, SourceRange, TokenKind -from clang.cindex import PrintingPolicy, PrintingPolicyProperty, LinkageKind +from clang.cindex import PrintingPolicy, PrintingPolicyProperty, LinkageKind, StorageClass from clang.cindex import conf, SourceLocation from ctypes import pointer, c_size_t, c_char_p -from .tools import run_subprocess +from .adapters import Code logger = logging.getLogger("ideas.ast") FILENAME = "file.c" +CodeC = Code["c"] @dataclass(frozen=True) @@ -39,11 +40,43 @@ def kind(self) -> CursorKind: return self.cursor.kind @property - def declaration(self) -> str | None: - return get_cursor_code(self.decl) if self.decl else None + def llm_context_declaration(self) -> str: + # Synthesize forward declaration from cursor + if self.cursor.kind == CursorKind.FUNCTION_DECL: + result_type = ( + self.cursor.result_type.spelling if self.cursor.result_type else "void" + ) + params = ", ".join( + p.type.spelling + (" " + p.spelling if p.spelling else "") # type: ignore[reportOptionalMemberAccess] + for p in self.cursor.get_arguments() + ) + return f"{result_type} {self.cursor.spelling}({params});" + elif self.cursor.kind in ( + CursorKind.STRUCT_DECL, + CursorKind.UNION_DECL, + CursorKind.ENUM_DECL, + ): + kind_name = { + CursorKind.STRUCT_DECL: "struct", + CursorKind.UNION_DECL: "union", + CursorKind.ENUM_DECL: "enum", + }[self.cursor.kind] + return f"{kind_name} {self.cursor.spelling};" + elif self.cursor.kind == CursorKind.TYPEDEF_DECL: + underlying = self.cursor.underlying_typedef_type.spelling + return f"typedef {underlying} {self.cursor.spelling};" + elif self.cursor.kind == CursorKind.VAR_DECL: + return f"{self.cursor.type.spelling} {self.cursor.spelling};" + + # Fallback: return full code + return self.code.text @property - def code(self) -> str: + def declaration(self) -> CodeC | None: + return get_cursor_code(self.decl, pretty_print=True) if self.decl else None + + @property + def code(self) -> CodeC: return get_cursor_code(self.parent or self.cursor, pretty_print=True) @property @@ -66,36 +99,9 @@ def is_global(self) -> bool: def is_system(self) -> bool: return self.cursor.location.is_in_system_header - @cached_property - def static_translation(self) -> str: - # FIXME: Handle VAR_DECL via c2rust? - # Ignore non-containers - if self.kind not in ( - CursorKind.STRUCT_DECL, - CursorKind.UNION_DECL, - CursorKind.ENUM_DECL, - CursorKind.TYPEDEF_DECL, - ): - return "" - - # Ignore anonymous containers - symbol_name = (self.parent or self.cursor).spelling - if not symbol_name: - return "" - - # Generate translation of container - bindgen = [ - "bindgen", - "--disable-header-comment", - "--no-doc-comments", - "--no-layout-tests", - "--no-recursive-allowlist", - "--allowlist-item", - symbol_name, - self.cursor.translation_unit.spelling, - ] - ok, output, _, _ = run_subprocess(bindgen) - return output if ok else "" + @property + def source_path(self) -> Path: + return Path(self.cursor.translation_unit.spelling).resolve() def with_declaration(self, decl: Cursor) -> "Symbol": return Symbol(self.name, self.cursor, self.parent, decl=decl) @@ -109,10 +115,11 @@ class TreeResult: ) -def create_translation_unit(path_or_code: Path | str) -> TranslationUnit: +def create_translation_unit(path_or_code: Path | CodeC) -> TranslationUnit: # Parse the code using clang - if isinstance(path_or_code, str): - tu = TranslationUnit.from_source(FILENAME, unsaved_files=[(FILENAME, path_or_code)]) + if isinstance(path_or_code, CodeC): + code = path_or_code + tu = TranslationUnit.from_source(FILENAME, unsaved_files=[(FILENAME, code.text)]) else: tu = TranslationUnit.from_source(str(path_or_code.resolve())) if any(d.severity >= Diagnostic.Error for d in tu.diagnostics): @@ -169,10 +176,17 @@ def extract_symbol_info_c(node: Cursor, parent: Cursor | None = None) -> dict[st symbols[child_name] = child_symbol.with_declaration(symbols[child_name].cursor) elif symbols[child_name].is_definition and not child_symbol.is_definition: if not symbols[child_name].is_system or not child_symbol.is_system: - logger.warning(f"Ignoring declaration after definition of `{child_name}`") + logger.debug(f"Ignoring declaration after definition of `{child_name}`") elif not symbols[child_name].is_definition and not child_symbol.is_definition: - if not symbols[child_name].is_system or not child_symbol.is_system: - logger.warning(f"Ignoring re-declaration of `{child_name}`") + if ( + child_symbol.cursor.kind == CursorKind.VAR_DECL + and symbols[child_name].cursor.storage_class == StorageClass.EXTERN + and child_symbol.cursor.storage_class != StorageClass.EXTERN + ): + # Prefer non-extern variable declaration (e.g. tentative definition) over extern one + symbols[child_name] = child_symbol + elif not symbols[child_name].is_system or not child_symbol.is_system: + logger.debug(f"Ignoring re-declaration of `{child_name}`") return symbols @@ -201,7 +215,7 @@ def extract_referenced_symbols(node: Cursor, global_symbols: Iterable[str]) -> l def get_code_from_tu_range( tu: TranslationUnit, source_range: SourceRange, encoding: str = "utf-8" -) -> str: +) -> CodeC: assert source_range.start.file == source_range.end.file, ( f"{source_range.start.file} != {source_range.end.file}" ) @@ -209,10 +223,10 @@ def get_code_from_tu_range( length = pointer(c_size_t()) code = conf.lib.clang_getFileContents(tu, source_range.start.file, length) assert code is not None - return code[source_range.start.offset : source_range.end.offset].decode(encoding) + return CodeC(code[source_range.start.offset : source_range.end.offset].decode(encoding)) -def get_cursor_prettyprinted(cursor: Cursor) -> str: +def get_cursor_prettyprinted(cursor: Cursor) -> CodeC: # Include tag definition when: # node is not struct/enum/union # and any child is a struct/enum/union definition @@ -226,10 +240,10 @@ def get_cursor_prettyprinted(cursor: Cursor) -> str: policy = PrintingPolicy.create(cursor) policy.set_property(PrintingPolicyProperty.IncludeTagDefinition, include_tag_definition) - return cursor.pretty_printed(policy).rstrip() + return CodeC(cursor.pretty_printed(policy)) -def get_cursor_code(cursor: Cursor, pretty_print: bool = False) -> str: +def get_cursor_code(cursor: Cursor, pretty_print: bool = False) -> CodeC: if pretty_print: code = get_cursor_prettyprinted(cursor) else: @@ -237,7 +251,7 @@ def get_cursor_code(cursor: Cursor, pretty_print: bool = False) -> str: # Non-function definitions require statement terminations if cursor.kind != CursorKind.FUNCTION_DECL or not cursor.is_definition(): - code += ";" + code = CodeC(code.text.rstrip() + ";") return code @@ -387,6 +401,124 @@ def clang_make_extern_(path: Path, spelling: str): _apply_edits(path, edits) +def clang_make_bindable_(path: Path, spelling: str): + source = path.read_bytes() + tu = create_translation_unit(path) + tu_path = Path(tu.spelling).resolve() + edits: dict[tuple[int, int], bytes] = {} + cursors = _find_cursors(tu, spelling) + has_variable_initializer_definition = any( + cursor.kind == CursorKind.VAR_DECL + and any( + token.kind == TokenKind.PUNCTUATION and token.spelling == "=" + for token in _get_tokens(cursor) + ) + for cursor in cursors + ) + inserted_fallback_variable_extern = False + + for cursor in cursors: + # We don't handle cursors not in the provided translation unit or anything without a definition + if ( + cursor.location.file is None + or Path(cursor.location.file.name).resolve() != tu_path + or Path(cursor.extent.start.file.name).resolve() != tu_path + or Path(cursor.extent.end.file.name).resolve() != tu_path + ): + raise NotImplementedError(f"Found `{spelling}` cursor `{cursor}` not in {tu_path}!") + if cursor.kind not in DEFINITION_START_TOKEN: + raise ValueError(f"Unhandled cursor kind {cursor.kind}!") + + tokens = list(_get_tokens(cursor)) + assert len(tokens) > 0 + + is_extern = False + definition_start_token_idx = None + + for i, token in enumerate(tokens): + # Remove storage specifiers from declaration while preserving offsets + if token.kind == TokenKind.KEYWORD and token.spelling in ("static", "inline"): + assert i + 1 < len(tokens), "storage specifier should always come before name" + start_offset = token.extent.start.offset + # Use start of next token as end offset to remove any whitespace + end_offset = tokens[i + 1].extent.start.offset + edits[(start_offset, end_offset)] = b"" + + # Check if extern keyword already present + elif token.kind == TokenKind.KEYWORD and token.spelling == "extern": + is_extern = True + + # Record the first definition-opening token. + elif ( + definition_start_token_idx is None + and token.kind == TokenKind.PUNCTUATION + and token.spelling == DEFINITION_START_TOKEN[cursor.kind] + ): + definition_start_token_idx = i + break + + # clang_make_bindable_ matches clang_make_extern_ behavior for functions. + if cursor.kind == CursorKind.FUNCTION_DECL: + # Replace definition portion with ';' + if definition_start_token_idx is not None: + assert definition_start_token_idx > 0 + # Use end of prior token as end offset to remove any whitespace + start_pos = tokens[definition_start_token_idx - 1].extent.end.offset + end_pos = cursor.extent.end.offset + edits[(start_pos, end_pos)] = b";" + + # Add 'extern ' prefix if not already present + if not is_extern: + extern_insert_pos = cursor.extent.start.offset + edits[(extern_insert_pos, extern_insert_pos)] = b"extern " + continue + + # For variables, keep the definition and insert an extern declaration before it. + assert cursor.kind == CursorKind.VAR_DECL + + declaration_end_idx = None + for i, token in enumerate(tokens): + if token.kind != TokenKind.PUNCTUATION: + continue + if token.spelling in ("=", ";"): + declaration_end_idx = i + break + if declaration_end_idx is None: + declaration_end_idx = len(tokens) + + declaration_tokens = [ + token + for token in tokens[:declaration_end_idx] + if not ( + token.kind == TokenKind.KEYWORD + and token.spelling in ("static", "inline", "extern") + ) + ] + if len(declaration_tokens) == 0: + continue + + should_insert_extern = definition_start_token_idx is not None + if ( + not should_insert_extern + and not has_variable_initializer_definition + and not is_extern + and not inserted_fallback_variable_extern + ): + should_insert_extern = True + inserted_fallback_variable_extern = True + + if should_insert_extern: + declaration_start = declaration_tokens[0].extent.start.offset + declaration_end = declaration_tokens[-1].extent.end.offset + declaration = source[declaration_start:declaration_end].decode().rstrip() + extern_decl = f"extern {declaration};\n".encode() + extern_insert_pos = cursor.extent.start.offset + edits[(extern_insert_pos, extern_insert_pos)] = extern_decl + + if edits: + _apply_edits(path, edits) + + def _get_tokens(cursor: Cursor): # Use get_tokens if it actually returns a non-empty list tokens = list(cursor.get_tokens()) @@ -460,3 +592,58 @@ def _apply_edits(path: Path, edits: dict[tuple[int, int], bytes]): source = source[:start] + replacement + source[end:] path.write_bytes(source) + + +def get_system_macro_undefs(includes: list[str], code: str) -> list[str]: + if not includes: + return [] + + # Parse the includes to enumerate system macros + include_text = "\n".join(includes) + "\n" + tu = TranslationUnit.from_source( + "undefs.c", + unsaved_files=[("undefs.c", include_text)], + options=TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD, + ) + + self_ref_macros: set[str] = set() + assert tu.cursor is not None + for cursor in tu.cursor.get_children(): + if cursor.kind != CursorKind.MACRO_DEFINITION: + continue + if not cursor.location.is_in_system_header: + continue + tokens = list(cursor.get_tokens()) + # Function-like macros have '(' immediately after the name token + if len(tokens) >= 2 and tokens[1].spelling == "(": + continue + # FIXME: This needs to be stronger and not detect #define stuff mystuff + # as self-referencing. + # We should ideally only check the replacement list tokens, + # but clang does not provide a way to get just those. + name = cursor.spelling + if any(tok.spelling == name for tok in tokens[1:]): + self_ref_macros.add(name) + + if not self_ref_macros: + return [] + + # Only #undef self-referencing macros whose name appears in the code + # FIXME: Would be nice if we had an AST list of already-expanded macros across all TUs + code_identifiers = set(re.findall(r"\b([A-Za-z_]\w*)\b", code)) + conflicting = self_ref_macros & code_identifiers + + return [f"#undef {name}" for name in sorted(conflicting)] + + +def mangle(name: str) -> str: + name = name.replace(" ", "_") + name = name.replace(".", "_") + name = name.replace(":", "_") + name = name.replace("-", "_") + + # Cannot start with a digit + if name and name[0].isdigit(): + name = "_" + name + + return name diff --git a/src/ideas/ast_rust.py b/src/ideas/ast_rust.py index 6e25457..81fa536 100644 --- a/src/ideas/ast_rust.py +++ b/src/ideas/ast_rust.py @@ -9,9 +9,12 @@ from tree_sitter import Language, Parser, Node, Query, QueryCursor import tree_sitter_rust +from .adapters import Code + # Initialize the Rust language once RUST_LANGUAGE = Language(tree_sitter_rust.language()) RUST_PARSER = Parser(RUST_LANGUAGE) +CodeRust = Code["rust"] class RustFnSignature: @@ -96,9 +99,9 @@ def get_macro_nodes(root: Node, placeholder: str) -> list[Node]: return list(ancestors) -def validate_changes(code: str, template: str) -> OrderedDict[str, str]: - code_root = get_root(code) - template_root = get_root(template) +def validate_changes(code: CodeRust, template: CodeRust) -> OrderedDict[str, str]: + code_root = get_root(code.text) + template_root = get_root(template.text) nodes = get_nodes(code_root) template_nodes = get_nodes(template_root) @@ -143,3 +146,57 @@ def validate_changes(code: str, template: str) -> OrderedDict[str, str]: ) return scope_feedback + + +def mangle(name: str) -> str: + # FIXME: It would be much nicer to let bindgen mangle names but need to feed the mangled name to --allowlist-function. + # See: https://github.com/rust-lang/rust-bindgen/blob/b7b501feb2642b6ac3796f8c5f2a1461640a2a67/bindgen/ir/context.rs#L859-L887 + if ( + "@" in name + or "?" in name + or "$" in name + or name in ("abstract", "alignof", "as", "async", "await", "become", "box", "break") + or name in ("const", "continue", "crate", "do", "dyn", "else", "enum", "extern") + or name in ("false", "final", "fn", "for", "gen", "if", "impl", "in") + or name in ("let", "loop", "macro", "match", "mod", "move", "mut", "offsetof") + or name in ("override", "priv", "proc", "pub", "pure", "ref", "return", "Self") + or name in ("self", "sizeof", "static", "struct", "super", "trait", "true", "try") + or name in ("type", "typeof", "unsafe", "unsized", "use", "virtual", "where", "while") + or name in ("yield", "str", "bool", "f32", "f64", "usize", "isize", "u128") + or name in ("i128", "u64", "i64", "u32", "i32", "u16", "i16", "u8", "i8", "_") + ): + name = name.replace("@", "_") + name = name.replace("?", "_") + name = name.replace("$", "_") + name += "_" + return name + + +def _rust_node_signature(node: Node, source: bytes) -> str | None: + ntype = node.type + if ntype in ("function_item", "function_signature_item"): + # Find the block body and remove it + body = node.child_by_field_name("body") + if body: + # Everything before the body is the signature + sig = source[node.start_byte : body.start_byte].rstrip() + return sig.decode() + ";" + + # Keep everything else as-is + return source[node.start_byte : node.end_byte].decode() + + +def get_signatures(code: CodeRust) -> CodeRust: + if not code.text.strip(): + return code + + source = code.text.encode() + root = get_root(source) + parts: list[str] = [] + + for node in root.children: + sig = _rust_node_signature(node, source) + if sig: + parts.append(sig) + + return CodeRust("\n".join(parts)) if parts else CodeRust("") diff --git a/src/ideas/cmake.py b/src/ideas/cmake.py index 39fac58..cca4d34 100644 --- a/src/ideas/cmake.py +++ b/src/ideas/cmake.py @@ -6,6 +6,7 @@ import sys import os +import json import logging import shutil @@ -16,7 +17,7 @@ from omegaconf import MISSING from hydra.core.config_store import ConfigStore -from .tools import run_subprocess +from .tools import run_subprocess, LARGE_PROJECT logger = logging.getLogger("ideas.cmake") @@ -31,6 +32,23 @@ class CmakeConfig: cs.store(name="cmake", node=CmakeConfig) +def _normalize_isystem(compile_commands_path: Path) -> None: + """Replace -isystem with -I in compile_commands.json""" + if not compile_commands_path.exists(): + return + db = json.loads(compile_commands_path.read_text()) + for entry in db: + if "command" in entry: + entry["command"] = entry["command"].replace("-isystem", "-I") + + if "arguments" in entry: + entry["arguments"] = [ + "-I" + arg[len("-isystem") :] if arg.startswith("-isystem") else arg + for arg in entry["arguments"] + ] + compile_commands_path.write_text(json.dumps(db, indent=2)) + + def configure( source_dir: Path, build_dir: Path, @@ -41,6 +59,7 @@ def configure( flags = [ "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DCMAKE_C_COMPILER=clang", ] if extract_info_cmake := os.environ.get("EXTRACT_INFO_CMAKE"): flags.append(f"-DCMAKE_PROJECT_TOP_LEVEL_INCLUDES={extract_info_cmake}") @@ -56,6 +75,11 @@ def configure( if not success: raise RuntimeError(f"CMake configuration failed:{' '.join(cmd)}\n{output + error}") + # Replace -isystem with -I in compile_commands.json so that all project + # headers get consistent USRs regardless of CMake SYSTEM keyword usage. + if LARGE_PROJECT: + _normalize_isystem(build_dir / "compile_commands.json") + def build(build_dir: Path, preset: str | None = None) -> None: if not preset: diff --git a/src/ideas/convert_tests.py b/src/ideas/convert_tests.py index 18fc367..b1c78d8 100644 --- a/src/ideas/convert_tests.py +++ b/src/ideas/convert_tests.py @@ -16,7 +16,7 @@ from hydra.core.config_store import ConfigStore from hydra.core.hydra_config import HydraConfig -from ideas.tools import Crate, run_subprocess +from ideas.tools import Crate, rustfmt logger = logging.getLogger("ideas.translate") @@ -26,7 +26,6 @@ class ConvertConfig: test_vectors: list[Path] = MISSING output: Path = MISSING - timeout: int = 600000 vcs: str = "none" # Library-specific inputs @@ -38,11 +37,6 @@ class ConvertConfig: cs.store(name="convert_tests", node=ConvertConfig) -def rustfmt(path: Path) -> None: - cmd = ["rustfmt", str(path)] - run_subprocess(cmd) - - def to_rust_str(string): return '"' + repr(string)[1:-1].replace('"', '\\"') + '"' @@ -55,19 +49,17 @@ def is_bin_test(test_case: Path): def add_deps_for_exec(crate: Crate) -> None: # Add test dependencies crate.cargo_add(dep="assert_cmd@2.0.17", section="dev") - crate.cargo_add(dep="ntest@0.9.3", section="dev") crate.cargo_add(dep="predicates@3.1.3", section="dev") crate.invalidate_metadata() -def convert_tests_for_exec(test_cases: list[Path], timeout: int = 60000) -> str: +def convert_tests_for_exec(test_cases: list[Path]) -> str: test_cases = list(filter(is_bin_test, test_cases)) if len(test_cases) == 0: return "" output = "" output += "use assert_cmd::Command;\n" - output += "use ntest::timeout;\n" output += "use predicates::prelude::*;\n" output += "\n" @@ -114,11 +106,15 @@ def convert_tests_for_exec(test_cases: list[Path], timeout: int = 60000) -> str: raise ValueError(f"stderr.is_regex must be a boolean, got {type(is_stderr_regex)}") output += "#[test]\n" - output += f"#[timeout({timeout})]\n" output += f"fn test_case_{test_case.stem}() {{\n" - output += " Command::cargo_bin(assert_cmd::crate_name!()).unwrap()" - if len(args) > 0: - output += f".args(&[{', '.join([to_rust_str(arg) for arg in args])}])" + output += "let pkg_name_path = assert_cmd::cargo::cargo_bin(assert_cmd::pkg_name!());\n" + output += ' Command::new("stdbuf")' + output += '.arg("-e0")' + output += '.arg("-o0")' + output += ".arg(pkg_name_path)" + args = [to_rust_str(arg) for arg in args] + if args: + output += f".args([{', '.join(args)}])" if stdin is not None: output += f".write_stdin({to_rust_str(stdin)})" output += ".assert()" @@ -145,7 +141,6 @@ def is_lib_test(test_case: Path): def add_deps_for_lib(crate: Crate) -> None: # Add test dependencies - crate.cargo_add(dep="ntest@0.9.3", section="dev") crate.cargo_add(dep="once_cell@1.21.3", section="dev") crate.cargo_add(dep="test-cdylib@1.1.0", section="dev") crate.invalidate_metadata() @@ -155,7 +150,6 @@ def convert_tests_for_lib( test_cases: list[Path], runner_manifest: Path | None, template_path: Path | None, - timeout: int = 60000, ) -> str: test_cases = list(filter(is_lib_test, test_cases)) if len(test_cases) == 0: @@ -167,8 +161,6 @@ def convert_tests_for_lib( # Load template template = template_path.read_text() - # Replace the timeout - template = template.replace("#[timeout(placeholder)]", f"#[timeout({timeout})]") # FIXME: This currently assumes that the macro generate_tests! is defined in the template # Use the generate_tests! macro to add tests @@ -195,8 +187,8 @@ def _main(cfg: ConvertConfig) -> None: cargo_toml = output_dir / "Cargo.toml" output_file = output_dir / cfg.output - exec_tests = convert_tests_for_exec(test_vectors, cfg.timeout) - lib_tests = convert_tests_for_lib(test_vectors, runner_manifest, cfg.template, cfg.timeout) + exec_tests = convert_tests_for_exec(test_vectors) + lib_tests = convert_tests_for_lib(test_vectors, runner_manifest, cfg.template) # Write and format tests output_file.parent.mkdir(exist_ok=True) output_file.write_text(exec_tests + "\n" + lib_tests) diff --git a/src/ideas/evaluate.py b/src/ideas/evaluate.py new file mode 100644 index 0000000..83718b9 --- /dev/null +++ b/src/ideas/evaluate.py @@ -0,0 +1,101 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import re +import sys +import logging +from dataclasses import dataclass +from pathlib import Path + +import hydra +from omegaconf import MISSING +from hydra.core.config_store import ConfigStore + +from ideas.tools import Crate, nextest_json_to_libtest + + +logger = logging.getLogger("ideas.evaluate") + + +@dataclass +class EvaluateConfig: + manifest: Path = MISSING + test_cases: str = MISSING + + output_file: Path = MISSING + + +cs = ConfigStore.instance() +cs.store(name="evaluate", node=EvaluateConfig) + + +_TEST_FN_RE = re.compile(r"^\s*fn\s+(\w+)\s*\(", re.MULTILINE) + + +def list_tests(test_file: Path) -> list[str]: + """Parse #[test] function names directly from a .rs integration test file.""" + source = test_file.read_text() + tests = [] + lines = source.splitlines() + for i, line in enumerate(lines): + if line.strip() == "#[test]": + for subsequent in lines[i + 1 :]: + m = _TEST_FN_RE.match(subsequent) + if m: + tests.append(m.group(1)) + break + # Skip attributes/comments between #[test] and fn + if subsequent.strip() and not subsequent.strip().startswith(("#", "/")): + break + return tests + + +def _main(cfg: EvaluateConfig) -> None: + # Resolve integration test file (error loudly if missing) + crate = Crate(cargo_toml=cfg.manifest, vcs="none") + test_file = crate.cargo_toml.parent / "tests" / f"{cfg.test_cases}.rs" + if not test_file.exists(): + raise FileNotFoundError(f"Integration test file not found: {test_file}") + + # Attempt to build the evaluation test + builds, _, _, _ = crate.cargo_test( + name=cfg.test_cases, quiet=False, fail_fast=True, build_only=True + ) + if builds: + # Use libtest-json output, parse it, and reformat for readability + # stderr contains the native nextest output + _, stdout, stderr, _ = crate.cargo_test( + name=cfg.test_cases, message_format="libtest-json" + ) + output = nextest_json_to_libtest(stdout) + stderr + else: + output = f"Failed to build test target {cfg.test_cases} for evaluation!\n" + names = list_tests(test_file) + lines = [f"test {name} ... FAILED" for name in names] + output += "\n".join(lines) + if lines: + output += "\n" + + # Write to output file + cfg.output_file.parent.mkdir(parents=True, exist_ok=True) + cfg.output_file.write_text(output) + crate.vcs.add(cfg.output_file) + crate.vcs.commit(f"Evaluation results for {cfg.test_cases}") + print(output) + + +@hydra.main(version_base=None, config_name="evaluate") +def main(cfg: EvaluateConfig) -> None: + try: + _main(cfg) + except Exception as e: + logger.exception(e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/ideas/init/build.py b/src/ideas/init/build.py new file mode 100644 index 0000000..dbddd85 --- /dev/null +++ b/src/ideas/init/build.py @@ -0,0 +1,252 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +import re +import sys +import logging +import textwrap +from pathlib import Path +from dataclasses import dataclass + +import hydra +from hydra.core.config_store import ConfigStore +from hydra.core.hydra_config import HydraConfig + +from ideas.tools import Crate, LARGE_PROJECT +from ideas.tools import run_subprocess +from ideas.ast_rust import CodeRust, mangle +from ideas import create_translation_unit, extract_info_c +from ideas.init.consolidate import get_symbols_and_dependencies + +logger = logging.getLogger("ideas.init.build") + + +@dataclass +class BuildConfig: + vcs: str = "none" + + def __post_init__(self): + if self.vcs not in ["git", "none"]: + raise ValueError(f"Invalid VCS: {self.vcs}!") + + +cs = ConfigStore.instance() +cs.store(name="init.build", node=BuildConfig) + + +def write_build_script(crate: Crate) -> Path: + c_src_path = crate.c_src_path.relative_to(crate.cargo_toml.parent) + build_options = '.define("main", "_main")' if crate.is_bin else "" + build_rs_src = textwrap.dedent( + f""" + fn main() {{ + println!("cargo:rerun-if-changed={c_src_path}"); + cc::Build::new() + .compiler("clang") + .warnings(false) + .file("{c_src_path}") + {build_options} + .compile("library"); + println!("cargo:rustc-link-lib=static=library"); + // FIXME: How do we statically add libraries to link to? + println!("cargo:rustc-link-lib=dylib=crypto"); + }} + """ + ) + + build_rs_path = crate.cargo_toml.parent / "build.rs" + build_rs_path.write_text(build_rs_src) + return build_rs_path + + +def write_main_binding(crate: Crate) -> str: + # Get binding for main (redefined as _main) + main_binding = get_linked_binding("_main", crate.c_src_path, "-Dmain=_main") + + main_binding_path = crate.rust_src_path.parent / "binding" / "main.rs" + main_binding_path.parent.mkdir(exist_ok=True) + main_binding_path.write_text( + "\n\n".join( + [ + "#![allow(unused_attributes)]", + main_binding.text, + ] + ) + ) + crate.vcs.add(main_binding_path) + + # Return appropriate main function instead of writing to binding.rs + if "fn _main()" in main_binding.text: + return textwrap.dedent( + """ + pub fn main() { + let ret = unsafe { binding::main::_main() }; + std::process::exit(ret); + } + """ + ) + else: + return textwrap.dedent( + """ + pub fn main() { + let mut args: Vec<_> = std::env::args().into_iter().map(|s| std::ffi::CString::new(s).unwrap().into_raw()).collect(); + let ret = unsafe { binding::main::_main(args.len() as i32, args.as_mut_ptr()) }; + std::process::exit(ret); + } + """ + ) + + +def write_symbol_binding(crate: Crate, symbol_name: str): + rust_spelling = mangle(symbol_name) + symbol_binding = get_linked_binding(rust_spelling, crate.c_src_path) + + symbol_binding_path = crate.rust_src_path.parent / "binding" / f"{rust_spelling}.rs" + symbol_binding_path.parent.mkdir(exist_ok=True) + symbol_binding_path.write_text( + "\n\n".join( + [ + "#![allow(unused_attributes)]", + symbol_binding.text, + ] + ) + ) + crate.vcs.add(symbol_binding_path) + + binding_path = crate.rust_src_path.parent / "binding.rs" + with binding_path.open("a+") as f: + f.write(f"pub mod {rust_spelling};\n") + crate.vcs.add(binding_path) + + +def get_linked_binding(function_name: str, c_src_path: Path, *bindgen_args: str) -> CodeRust: + # Use bindgen to generate binding to C symbol + bindgen = [ + "bindgen", + "--disable-header-comment", + "--no-doc-comments", + "--no-layout-tests", + "--allowlist-function", + function_name, + str(c_src_path), + "--", + *bindgen_args, + ] + ok, binding, error, _ = run_subprocess(bindgen) + if not ok: + raise ValueError(f"`{' '.join(bindgen)}` failed!\n{binding + error}") + + # Remove \u{1} prefix from link_name attribute + linked_binding = binding.replace('#[link_name = "\\u{1}', '#[link_name = "') + + # Enable the symbol to be re-exportable by rustc + linked_binding = re.sub( + r'unsafe extern "C" {\n(.*)\n}', + r'#[link(name="library", kind="static")]\nunsafe extern "C" {\n #[unsafe(no_mangle)]\n\1\n}', + linked_binding, + flags=re.DOTALL, + ) + if linked_binding == binding: + raise ValueError( + f"Failed to convert binding to linked binding for {function_name}!\n{binding}" + ) + return CodeRust(linked_binding) + + +def _main(cfg: BuildConfig) -> None: + output_dir = Path(HydraConfig.get().runtime.output_dir) + + if LARGE_PROJECT: + logger.info("Hybrid build is disabled; skipping build.rs generation!") + return + + # Fetch crate + crate = Crate( + cargo_toml=output_dir / "Cargo.toml", + vcs=cfg.vcs, # type: ignore[reportArgumentType] + ) + + # Get global symbol table + tu = create_translation_unit(crate.c_src_path) + asts = [extract_info_c(tu)] + symbols, _ = get_symbols_and_dependencies( + asts, external_symbol_names=["c:@F@main"] if crate.is_bin else None + ) + global_functions = [ + s for s in symbols.values() if s.is_global and (s.is_function and s.is_definition) + ] + if not global_functions: + logger.info("No global functions to generate bindings for!") + return + + # Write build.rs file + build_rs_path = write_build_script(crate) + crate.vcs.add(build_rs_path) + + # Verify build with build.rs + builds, feedback = crate.cargo_build() + if not builds: + raise RuntimeError(f"Crate at {output_dir} does not build with build.rs!\n{feedback}") + + # Generate a Rust binding for any global function since we need to force the Rust + # linker to include that C function in the Rust artifact. + # FIXME: If we ever test variables we should generate bindings for those here too! + binding_path = crate.rust_src_path.parent / "binding.rs" + binding_path.write_text("") + main_function = "" + for symbol in global_functions: + if not (symbol.is_function and symbol.is_definition and symbol.is_global): + continue + if crate.is_bin and symbol.spelling == "main": + # main requires special handling because we must bind to it as _main and + # statically create a Rust main that calls it + main_function = write_main_binding(crate) + else: + write_symbol_binding(crate, symbol.spelling) + + # Write main function and binding to it + with crate.rust_src_path.open("a+") as f: + f.write(main_function) + if main_function: + with binding_path.open("a+") as f: + f.write("pub mod main;\n") + crate.vcs.add(crate.rust_src_path) + crate.vcs.add(binding_path) + + # Make the bindings module visible in the crate + rust_src = crate.rust_src_path.read_text() + BINDING_MOD = "pub mod binding;" + if not re.search(f"^{re.escape(BINDING_MOD)}$", rust_src, flags=re.MULTILINE): + crate.rust_src_path.write_text("\n\n".join([rust_src, BINDING_MOD])) + crate.vcs.add(crate.rust_src_path) + + # Add hydra directory + if (output_subdir := HydraConfig.get().output_subdir) is not None: + crate.vcs.add(output_dir / output_subdir) + + # Attempt a final build + builds, feedback = crate.cargo_build() + if not builds: + raise RuntimeError(f"Crate at {output_dir} does not build with build.rs!\n{feedback}") + msg = f"Generated build artifacts for `{crate.root_package['name']}`" + logger.info(msg) + crate.vcs.commit(msg) + + # Clean on exit + crate.cargo_clean() + + +@hydra.main(version_base=None, config_name="init.build") +def main(cfg: BuildConfig) -> None: + try: + _main(cfg) + except Exception as e: + logger.exception(e) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/ideas/init/consolidate.py b/src/ideas/init/consolidate.py index dc95485..f048596 100644 --- a/src/ideas/init/consolidate.py +++ b/src/ideas/init/consolidate.py @@ -8,8 +8,8 @@ import os import logging from pathlib import Path +from functools import cmp_to_key from dataclasses import dataclass -from itertools import combinations from graphlib import TopologicalSorter, CycleError import hydra @@ -17,11 +17,18 @@ from omegaconf import MISSING from hydra.core.config_store import ConfigStore from hydra.core.hydra_config import HydraConfig -from clang.cindex import CompilationDatabase, TranslationUnit -from clang.cindex import TranslationUnitLoadError, Diagnostic - -from ideas.ast import extract_info_c, TreeResult, Symbol, clang_rename_ -from ideas.tools import Crate, check_c +from clang.cindex import CompilationDatabase, TranslationUnit, CursorKind, Cursor +from clang.cindex import TranslationUnitLoadError, Diagnostic, StorageClass + +from ideas.ast import ( + extract_info_c, + TreeResult, + Symbol, + clang_rename_, + get_system_macro_undefs, + mangle, +) +from ideas.tools import Crate, check_c, LARGE_PROJECT logger = logging.getLogger("ideas.init.consolidate") @@ -42,39 +49,60 @@ class ConsolidateConfig: def init(compile_commands: Path, source_priority: list[Path]) -> str: # Get symbol table and dependencies taking into account source priority asts = get_asts(compile_commands, source_priority) - symbols, dependencies = get_symbols_and_dependencies(asts, source_priority) + ast_order = create_ast_order(source_priority, asts) + symbols, dependencies = get_symbols_and_dependencies(asts, ast_order=ast_order) logger.info(f"Found {len(symbols)} symbols in {compile_commands}!") - # Consolidate C sources in topological order - sources = get_includes(symbols) - for group in TopologicalSorter(dependencies).static_order(): + # Consolidate C sources in lexicographical topological order + symbol_lexical_key = create_symbol_lexical_key_fn(symbols, ast_order) + sorted_symbol_groups = list( + nx.lexicographical_topological_sort( + nx.from_dict_of_lists(dependencies, create_using=nx.DiGraph).reverse(copy=False), # type: ignore + key=symbol_lexical_key, + ) + ) + + includes = get_includes(symbols) + feature_defines = get_feature_defines(compile_commands) + feature_undefs = [f"#undef {line.split()[1]}" for line in feature_defines] + sources = feature_defines + includes + feature_undefs + [""] + for group in sorted_symbol_groups: # Add forward declarations if more than one symbol in group if len(group) > 1: for name in group: declaration = symbols[name].declaration - if declaration and declaration not in sources: - sources.append(declaration) + if declaration and declaration.text not in sources: + sources.append(declaration.text) # Add symbol definitions for name in group: - definition = symbols[name].code + "\n" + definition = symbols[name].code.text if definition not in sources: sources.append(definition) + + # Prevent double-expansion of self-referencing macros from signal.h + header_len = len(feature_defines) + len(includes) + len(feature_undefs) + signal_includes = [inc for inc in includes if "signal.h" in inc] + code_text = "\n".join(sources[header_len:]) + undefs = get_system_macro_undefs(signal_includes, code_text) + if undefs: + sources = ( + feature_defines + includes + feature_undefs + undefs + [""] + sources[header_len:] + ) + return "\n".join(sources) def get_symbols_and_dependencies( asts: list[TreeResult], - source_priority: list[Path] | None = None, external_symbol_names: list[str] | None = None, + ast_order: dict[Path, TreeResult] | None = None, ) -> tuple[dict[str, Symbol], dict[tuple[str, ...], list[tuple[str, ...]]]]: - source_priority = source_priority or [] - # Merge ASTs into non-system project dependencies list_of_non_system_symbols = [ {n: s for n, s in ast.symbols.items() if not s.is_system} for ast in asts ] - project_symbols = merge_symbols(list_of_non_system_symbols, source_priority) + project_symbols = merge_symbols(list_of_non_system_symbols, ast_order) project_dependencies = nx.compose_all( [ nx.from_dict_of_lists(ast.complete_graph, create_using=nx.DiGraph) # type: ignore @@ -90,35 +118,29 @@ def get_symbols_and_dependencies( external_symbol_names = [ name for name, symbol in symbols.items() - if symbol.is_global and (symbol.is_function or symbol.is_variable) + if symbol.is_global + and (symbol.is_variable or (symbol.is_function and symbol.is_definition)) ] if external_symbol_names: paths = nx.multi_source_dijkstra_path(project_dependencies, external_symbol_names) symbols = {k: v for k, v in symbols.items() if k in paths} - dependencies = dependencies.subgraph(symbols.keys()) + dependencies = dependencies.subgraph(symbols.keys()).copy() else: logger.warning("No external symbols were found/specified!") # Remove cycles from graph by combining strongly-connected components. Note that we sort # members in a SCC so they are ordered lexically. - def symbol_lexical_key(name: str) -> tuple[int, str, int, str]: - sym = symbols[name] - loc = sym.cursor.location - tu_file = Path(sym.cursor.translation_unit.spelling).resolve() - - loc_file = tu_file - if loc.file is not None: - loc_file = Path(loc.file.name).resolve() - - file_rank = len(source_priority) - if loc_file in source_priority: - file_rank = source_priority.index(loc_file) - return (file_rank, str(loc_file), loc.offset, name) - C = nx.condensation(dependencies) + symbol_lexical_key = create_symbol_lexical_key_fn(symbols, ast_order) scc_map = {n: tuple(sorted(C.nodes[n]["members"], key=symbol_lexical_key)) for n in C.nodes} dependencies = {scc_map[n]: [scc_map[s] for s in C.successors(n)] for n in C.nodes} + # Force pure type-declaration SCCs into one SCC unit without introducing cycles. + if not LARGE_PROJECT: + dependencies = _merge_pure_type_declaration_sccs( + C, scc_map, symbols, symbol_lexical_key + ) + # Make sure dependencies are topologically sortable try: list(TopologicalSorter(dependencies).static_order()) @@ -128,6 +150,251 @@ def symbol_lexical_key(name: str) -> tuple[int, str, int, str]: return symbols, dependencies +def create_ast_order( + source_priority: list[Path], asts: list[TreeResult] +) -> dict[Path, TreeResult]: + # Preserve explicit source priority ordering first, then deterministically append + # any remaining TUs. + ast_by_path: dict[Path, TreeResult] = {} + for tree in asts: + first_symbol = next(iter(tree.symbols.values()), None) + if first_symbol is None: + continue + path = first_symbol.source_path + ast_by_path[path] = tree + + ast_order: dict[Path, TreeResult] = {} + seen: set[Path] = set() + + for path in source_priority: + resolved = path.resolve() + if resolved not in seen: + seen.add(resolved) + if resolved in ast_by_path: + ast_order[resolved] = ast_by_path[resolved] + + for tu_path in sorted(ast_by_path.keys()): + if tu_path not in seen: + logger.info("Adding translation unit not seen in source_priority: %s", tu_path) + seen.add(tu_path) + ast_order[tu_path] = ast_by_path[tu_path] + return ast_order + + +def create_symbol_lexical_key_fn( + symbols: dict[str, Symbol], + ast_order: dict[Path, TreeResult] | None = None, +): + def compare_symbol_lexical(a: str | tuple[str, ...], b: str | tuple[str, ...]) -> int: + # Support symbol groups by using the first symbol in the group. + a_name = a[0] if isinstance(a, tuple) else a + b_name = b[0] if isinstance(b, tuple) else b + + a_symbol = symbols[a_name] + b_symbol = symbols[b_name] + + a_tu = a_symbol.source_path + b_tu = b_symbol.source_path + + # If symbols are from the same translation unit, then we can directly + # compare their locations for lexical ordering. + if a_tu == b_tu: + return _cmp_cursor_loc(a_symbol.cursor, b_symbol.cursor) + + if ast_order is None: + raise RuntimeError( + f"Cannot compare symbols from different translation units without ast_order: {a} ({a_tu}) vs {b} ({b_tu})." + ) + + # If a's USR appears in b's TU with matching code, both symbols are + # present in b_tu and can be compared by their locations there + b_ast = ast_order.get(b_tu) + if ( + b_ast is not None + and a_name in b_ast.symbols + and b_ast.symbols[a_name].code.text == a_symbol.code.text + ): + return _cmp_cursor_loc(b_ast.symbols[a_name].cursor, b_symbol.cursor) + + # If b's USR appears in a's TU with matching code, both symbols are + # present in a_tu and can be compared by their locations there + a_ast = ast_order.get(a_tu) + if ( + a_ast is not None + and b_name in a_ast.symbols + and a_ast.symbols[b_name].code.text == b_symbol.code.text + ): + return _cmp_cursor_loc(a_symbol.cursor, a_ast.symbols[b_name].cursor) + + # The symbol's USR is not shared across TUs, so fall back to ordering + # by the position of each symbol's TU in ast_order (source priority) + ast_rank = {path: i for i, path in enumerate(ast_order)} + try: + a_rank = ast_rank[a_tu] + b_rank = ast_rank[b_tu] + except KeyError as ex: + raise RuntimeError( + f"Cannot compare symbols because one or both translation units are missing from ast_order: {a_tu}, {b_tu}." + ) from ex + + if a_rank < b_rank: + return -1 + if a_rank > b_rank: + return 1 + raise RuntimeError("Distinct translation units cannot have identical ranks!") + + return cmp_to_key(compare_symbol_lexical) + + +def _cmp_cursor_loc(cursor_a: Cursor, cursor_b: Cursor) -> int: + loc_a = cursor_a.location + loc_b = cursor_b.location + if loc_a < loc_b: + return -1 + if loc_b < loc_a: + return 1 + if cursor_a.get_usr() != cursor_b.get_usr(): + raise ValueError( + f"Unable to order distinct symbols with identical lexical priority and location:" + f" {cursor_a.get_usr()} @ {loc_a} vs {cursor_b.get_usr()} @ {loc_b}" + ) + return 0 + + +def _merge_pure_type_declaration_sccs( + condensed: nx.DiGraph, + scc_map: dict[int, tuple[str, ...]], + symbols: dict[str, Symbol], + symbol_lexical_key_fn, +) -> dict[tuple[str, ...], list[tuple[str, ...]]]: + base_dependencies: dict[tuple[str, ...], list[tuple[str, ...]]] = { + scc_map[n]: sorted( + (scc_map[s] for s in condensed.successors(n)), key=symbol_lexical_key_fn + ) + for n in condensed.nodes + } + + # Only include SCCs where every member is a type declaration + type_scc_nodes = [ + n + for n, members in scc_map.items() + if members + and all( + symbols[name].kind + in ( + CursorKind.STRUCT_DECL, + CursorKind.UNION_DECL, + CursorKind.ENUM_DECL, + CursorKind.FIELD_DECL, + CursorKind.ENUM_CONSTANT_DECL, + CursorKind.TYPEDEF_DECL, + ) + for name in members + ) + ] + if len(type_scc_nodes) <= 1: + return base_dependencies + + # Merge all pure type declaration SCCs into one SCC unit and update dependencies + # accordingly without introducing cycles. Preserve the dependency order + # between the original type SCCs so by-value type definitions remain valid. + type_scc_set = set(type_scc_nodes) + ordered_type_members_by_scc = { + n: tuple(sorted(scc_map[n], key=symbol_lexical_key_fn)) for n in type_scc_nodes + } + ordered_type_scc_nodes = sorted( + type_scc_nodes, + key=lambda n: tuple( + symbol_lexical_key_fn(name) for name in ordered_type_members_by_scc[n] + ), + ) + type_scc_dependencies = { + n: tuple( + succ + for succ in ordered_type_scc_nodes + if succ in set(condensed.successors(n)) and succ in type_scc_set + ) + for n in ordered_type_scc_nodes + } + try: + merged_group = tuple( + name + for n in TopologicalSorter(type_scc_dependencies).static_order() + for name in ordered_type_members_by_scc[n] + ) + except CycleError: + merged_group = tuple( + name for n in ordered_type_scc_nodes for name in ordered_type_members_by_scc[n] + ) + + merged_dependencies: dict[tuple[str, ...], set[tuple[str, ...]]] = {} + merged_successors: set[tuple[str, ...]] = set() + + for n in condensed.nodes: + if n in type_scc_set: + for succ in condensed.successors(n): + if succ not in type_scc_set: + merged_successors.add(scc_map[succ]) + continue + + group = scc_map[n] + merged_dependencies.setdefault(group, set()) + for succ in condensed.successors(n): + if succ in type_scc_set: + merged_dependencies[group].add(merged_group) + else: + merged_dependencies[group].add(scc_map[succ]) + + merged_dependencies.setdefault(merged_group, set()) + merged_dependencies[merged_group].update(merged_successors) + + return { + group: sorted(successors, key=symbol_lexical_key_fn) + for group, successors in merged_dependencies.items() + } + + +def get_feature_defines(compile_commands: Path) -> list[str]: + db = CompilationDatabase.fromDirectory(compile_commands.parent) + cmds = db.getAllCompileCommands() + if cmds is None: + return [] + + defines: dict[str, str | None] = {} # name -> value (None if no value) + for cmd in cmds: + args = iter(cmd.arguments) + for arg in args: + # Glued "-Dstuff" + if arg.startswith("-D") and len(arg) > 2: + macro = arg[2:] + # Separate ["-D", "stuff"] + elif arg == "-D": + macro = next(args, None) + assert macro is not None, ( + f"Malformed compile command: -D without value in {cmd.filename}" + ) + else: + continue + + name, _, value = macro.partition("=") + # Implementation-reserved namespace (_[A-Z]...) per the C standard. + if not (len(name) >= 2 and name[0] == "_" and name[1].isupper()): + continue + + current_value = value if value else None + if name in defines and defines[name] != current_value: + logger.warning( + "Feature-test macro %s has conflicting values across TUs: " + "%s vs %s (keeping latter)", + name, + defines[name], + current_value, + ) + defines[name] = current_value + + return [f"#define {n} {v}" if v else f"#define {n}" for n, v in defines.items()] + + def get_includes(symbols: dict[str, Symbol]) -> list[str]: includes: list[str] = [] for symbol in symbols.values(): @@ -159,7 +426,9 @@ def get_asts( cmds = db.getAllCompileCommands() assert cmds is not None asts = [] - for cmd in cmds: + for i in range(len(cmds)): + cmd = cmds[i] + logger.info(f"Parsing TU {cmd.filename} ({i + 1}/{len(cmds)}) ...") try: tu = TranslationUnit.from_source(None, args=list(cmd.arguments)) except TranslationUnitLoadError as e: @@ -167,7 +436,12 @@ def get_asts( f"Error parsing '{cmd.filename}' using args `{' '.join(cmd.arguments)}`\n{e}" ) if any(d.severity >= Diagnostic.Error for d in tu.diagnostics): - raise TranslationUnitLoadError("\n".join([d.format() for d in tu.diagnostics])) + raise TranslationUnitLoadError( + "\n".join( + [d.format() for d in tu.diagnostics] + + [f"Error parsing '{cmd.filename}' using args `{' '.join(cmd.arguments)}`"] + ) + ) assert tu.cursor is not None if not valid_paths or Path(tu.cursor.spelling).resolve() in valid_paths: ast = extract_info_c(tu) @@ -193,6 +467,14 @@ def rename_conflicting_symbols_(asts: list[TreeResult]) -> dict[Path, bytes]: spelling = symbol.spelling if not spelling: continue + + if symbol.kind == CursorKind.STRUCT_DECL: + spelling = "struct " + spelling + if symbol.kind == CursorKind.UNION_DECL: + spelling = "union " + spelling + if symbol.kind == CursorKind.ENUM_DECL: + spelling = "enum " + spelling + # Save this symbol if we haven't seen it before if spelling not in seen: seen[spelling] = symbol @@ -202,27 +484,49 @@ def rename_conflicting_symbols_(asts: list[TreeResult]) -> dict[Path, bytes]: for spelling, sym in seen.items(): symbols_with_spelling.setdefault(spelling, []).append(sym) - # Find symbols with common spelling but different definitions across ASTs + # Find symbols with common spelling but different definitions across ASTs. + # Group definitions by code to avoid O(n^2) pairwise comparison. tu_renames: dict[TranslationUnit, dict[str, str]] = {} - for spelling, symbol1, symbol2 in ( - (spelling, *symbol_pair) - for spelling, symbols in symbols_with_spelling.items() - for symbol_pair in combinations(symbols, r=2) - ): - # Two symbols can only clash if they have same spelling but different definitions - if not (symbol1.is_definition and symbol2.is_definition): + used_spellings = set(symbols_with_spelling.keys()) + for spelling, symbols in symbols_with_spelling.items(): + # Only definitions and variables can conflict + definitions = [s for s in symbols if s.is_definition or s.is_variable] + if len(definitions) <= 1: continue - if symbol1.code == symbol2.code: + + # Group definitions by their code text - identical code means no conflict + code_groups: dict[str, list[Symbol]] = {} + for sym in definitions: + code_groups.setdefault(sym.code.text, []).append(sym) + if len(code_groups) <= 1: continue - # Rename non-global, non-system symbols using TU stem as prefix - for symbol in (symbol1, symbol2): - if symbol.is_global or symbol.is_system: + + # Multiple distinct definitions exist - rename any symbol that can safely be + # renamed. Only true linker symbols (global functions and global variables) must + # preserve their spelling across TUs. Struct/union/enum tags and typedefs have + # no linker visibility in C, so they can differ freely between TUs. However, + # clang reports EXTERNAL linkage for all of these — including anonymous tags that + # inherit the name of their enclosing typedef — so we cannot rely on is_global + # to filter them out and must check the cursor kind explicitly. + NON_LINKED_KINDS = ( + CursorKind.STRUCT_DECL, + CursorKind.UNION_DECL, + CursorKind.ENUM_DECL, + CursorKind.TYPEDEF_DECL, + ) + for sym in definitions: + if sym.is_system: continue - path = Path(symbol.cursor.translation_unit.spelling).resolve() - new_spelling = path.stem + "_" + spelling - tu_renames.setdefault(symbol.cursor.translation_unit, {})[symbol.name] = ( - new_spelling - ) + if sym.is_global and sym.parent is None and sym.kind not in NON_LINKED_KINDS: + continue + + path = sym.source_path + new_spelling = mangle(path.stem) + "_" + sym.spelling + while new_spelling in used_spellings: + path = path.parent + new_spelling = mangle(path.stem) + "_" + new_spelling + used_spellings.add(new_spelling) + tu_renames.setdefault(sym.cursor.translation_unit, {})[sym.name] = new_spelling if not tu_renames: return {} @@ -232,7 +536,8 @@ def rename_conflicting_symbols_(asts: list[TreeResult]) -> dict[Path, bytes]: new_spellings = set(renames.values()) if existing_spellings.intersection(new_spellings): raise NotImplementedError( - "Renaming symbols would cause clashes with existing symbols with the same spelling!" + "Renaming symbols would cause clashes with existing symbols with the same spelling!\n" + f"Clashing: {existing_spellings.intersection(new_spellings)}" ) existing_spellings.update(new_spellings) @@ -253,8 +558,10 @@ def rename_conflicting_symbols_(asts: list[TreeResult]) -> dict[Path, bytes]: def merge_symbols( - list_of_symbols: list[dict[str, Symbol]], source_priority: list[Path] + list_of_symbols: list[dict[str, Symbol]], ast_order: dict[Path, TreeResult] | None = None ) -> dict[str, Symbol]: + ast_order = ast_order or {} + ast_rank: dict[Path, int] = {path: i for i, path in enumerate(ast_order)} global_symbols: dict[str, Symbol] = {} for symbols in list_of_symbols: # Gather symbols @@ -268,10 +575,8 @@ def merge_symbols( if global_symbols[name].code == symbol.code: continue - global_source = Path( - global_symbols[name].cursor.translation_unit.spelling - ).resolve() - symbol_source = Path(symbol.cursor.translation_unit.spelling).resolve() + global_source = global_symbols[name].source_path + symbol_source = symbol.source_path # If overwriting a symbol, then prefer one with a definition if ( @@ -284,26 +589,40 @@ def merge_symbols( and symbol.cursor.is_definition() ): global_symbols[name] = symbol + # Prefer non-extern variable declaration over extern one (e.g. tentative definition) + elif ( + symbol.cursor.kind == CursorKind.VAR_DECL + and global_symbols[name].cursor.storage_class == StorageClass.EXTERN + and symbol.cursor.storage_class != StorageClass.EXTERN + ): + global_symbols[name] = symbol + # Never replace a non-extern variable with an extern one + elif ( + global_symbols[name].cursor.kind == CursorKind.VAR_DECL + and global_symbols[name].cursor.storage_class != StorageClass.EXTERN + and symbol.cursor.storage_class == StorageClass.EXTERN + ): + continue # Or prefer the symbol with source priority - elif global_source in source_priority and symbol_source not in source_priority: + elif global_source in ast_order and symbol_source not in ast_order: continue - elif global_source not in source_priority and symbol_source in source_priority: + elif global_source not in ast_order and symbol_source in ast_order: global_symbols[name] = symbol elif ( - global_source in source_priority - and symbol_source in source_priority - and source_priority.index(global_source) > source_priority.index(symbol_source) + global_source in ast_order + and symbol_source in ast_order + and ast_rank[global_source] > ast_rank[symbol_source] ): global_symbols[name] = symbol elif ( - global_source in source_priority - and symbol_source in source_priority - and source_priority.index(global_source) < source_priority.index(symbol_source) + global_source in ast_order + and symbol_source in ast_order + and ast_rank[global_source] < ast_rank[symbol_source] ): continue else: # Two symbols have similar names but different declarations or definitions and no source priority! - raise NotImplementedError( + raise RuntimeError( f"Unable to handle symbol {name} with multiple different definitions and unknown source priority!\nSymbol found in {global_source} and {symbol_source}." ) return global_symbols diff --git a/src/ideas/init/crate.py b/src/ideas/init/crate.py index de37ebe..1bf3c3c 100644 --- a/src/ideas/init/crate.py +++ b/src/ideas/init/crate.py @@ -6,6 +6,7 @@ import sys import logging +import tomlkit from pathlib import Path from dataclasses import dataclass @@ -14,7 +15,7 @@ from hydra.core.config_store import ConfigStore from hydra.core.hydra_config import HydraConfig -from ideas.tools import Crate +from ideas.tools import Crate, LARGE_PROJECT logger = logging.getLogger("ideas.init.crate") @@ -24,6 +25,8 @@ class CrateConfig: crate_type: str = MISSING vcs: str = "none" + reexport_lib: bool = True + def __post_init__(self): if self.crate_type not in ["bin", "lib"]: raise ValueError(f"Invalid crate type: {self.crate_type}!") @@ -48,13 +51,57 @@ def _main(cfg: CrateConfig) -> None: # Delete default cargo init code crate.rust_src_path.write_text("") - # Add static dependencies and sections - crate.cargo_add(dep="openssl@0.10.75") + # Add static dependencies + crate.cargo_add(dep="libc@0.2.185") + crate.cargo_add(dep="openssl@0.10.79") + if LARGE_PROJECT: + crate.cargo_add(dep="flate2@1") + crate.cargo_add(dep="regex@1") + crate.cargo_add(dep="serde@1", section="dev", features=["derive"]) + crate.cargo_add(dep="serde_json@1", section="dev") + crate.cargo_add(dep="tempfile@3", section="dev") crate.cargo_add(dep="cc@1.2.53", section="build") + + if cfg.crate_type == "bin": + # Add static test dependencies + crate.cargo_add(dep="assert_cmd@2.0.17", section="dev") + crate.cargo_add(dep="predicates@3.1.3", section="dev") + + # Disable default tests + cargo_toml = tomlkit.loads(crate.cargo_toml.read_text()) + if cfg.crate_type == "bin": + bin, found = cargo_toml.get("bin", list()), False + for target in bin: + if target.get("name", None) == crate.root_package["name"]: + target["test"], found = False, True + break + if not found: + bin.append({"name": crate.root_package["name"], "test": False}) + cargo_toml["bin"] = bin if cfg.crate_type == "lib": - with crate.cargo_toml.open("a") as f: - f.write('\n[lib]\ncrate-type = ["lib", "cdylib"]\n') - crate.invalidate_metadata() + lib = cargo_toml.get("lib", dict()) + lib.update({"test": False, "doctest": False}) + cargo_toml["lib"] = lib + crate.cargo_toml.write_text(tomlkit.dumps(cargo_toml)) + + # Export cdylib + if cfg.crate_type == "lib" and cfg.reexport_lib: + cargo_toml = tomlkit.loads(crate.cargo_toml.read_text()) + lib = cargo_toml.get("lib", dict()) + lib.update({"crate-type": ["lib", "cdylib"]}) + cargo_toml["lib"] = lib + crate.cargo_toml.write_text(tomlkit.dumps(cargo_toml)) + + # Disable lints + cargo_toml = tomlkit.loads(crate.cargo_toml.read_text()) + lints = tomlkit.table(is_super_table=True) + lints.add("rust", {"nonstandard_style": "allow"}) + cargo_toml["lints"] = lints + crate.cargo_toml.write_text(tomlkit.dumps(cargo_toml)) + + # Configure testing + crate.cargo_nextest_config() + crate.invalidate_metadata() # Add cargo, workspace cargo, hydra log directory to VCS crate.vcs.add(crate.cargo_toml, crate.rust_src_path) diff --git a/src/ideas/learn/__init__.py b/src/ideas/learn/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/ideas/learn/translate.py b/src/ideas/learn/translate.py deleted file mode 100644 index 7d11090..0000000 --- a/src/ideas/learn/translate.py +++ /dev/null @@ -1,203 +0,0 @@ -# -# Copyright (C) 2025 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -import json -import pickle -import shutil -import logging -import tempfile -from pathlib import Path -from collections import defaultdict, OrderedDict -from dataclasses import dataclass, field - -import dspy -import hydra -from omegaconf import MISSING -from hydra.core.config_store import ConfigStore -from hydra.core.hydra_config import HydraConfig -from dspy.teleprompt.gepa.gepa_utils import DSPyTrace, ScoreWithFeedback - -from ideas import model, ModelConfig, GenerateConfig, tools -from ideas.translate_snippet import SnippetTranslatorSignature - -logger = logging.getLogger("ideas.learn.translate") - - -@dataclass -class TrainConfig: - student_examples: Path = MISSING - teacher_examples: Path = MISSING - - model: ModelConfig = field(default_factory=ModelConfig) - generate: GenerateConfig = field(default_factory=GenerateConfig) - - reflect_model: ModelConfig = field(default_factory=ModelConfig) - reflect_generate: GenerateConfig = field( - default_factory=lambda: GenerateConfig( - temperature=1.0, - max_new_tokens=32000, - ) - ) - - -cs = ConfigStore.instance() -cs.store(name="learn.translate", node=TrainConfig) - - -def metric( - gold: dspy.Example, - pred: dspy.Prediction, - trace: DSPyTrace | None = None, - pred_name: str | None = None, - pred_trace: DSPyTrace | None = None, -) -> float | ScoreWithFeedback: - with tempfile.TemporaryDirectory() as tmpdir: - shutil.copytree(gold.crate_path, tmpdir, dirs_exist_ok=True) - cargo_toml = Path(tmpdir) / "Cargo.toml" - crate = tools.Crate(cargo_toml=cargo_toml) - - # Make sure translation returned something - pred_translation = "" - if "translation" in pred and pred.translation is not None: - pred_translation = pred.translation.code - - # Write predicted translation - rust_srcs = [] - for name, translation in gold.crate_translation.items(): - if name != gold.snippet_name: - rust_srcs.append(translation) - else: - rust_srcs.append(pred_translation) - - rust_src = "\n\n".join(rust_srcs) - # Make libraries wrapper-aware - if not crate.is_bin: - rust_src += "\n\npub mod wrapper;\n" - - crate.rust_src_path.write_text(rust_src) - - # Attempt to build and run all tests - success, _, _, _ = tools.run_subprocess( - ["cargo", "test", f"--manifest-path={cargo_toml}"] - ) - - if not success: - return ScoreWithFeedback( - score=0.0, - feedback=f"The translation failed!\n\nA correct Rust translation is:\n```rust\n{gold.translation}\n```", - ) - else: - return 1.0 - - -def get_crate_and_data_paths(cargo_toml: str) -> tuple[Path, Path]: - cargo_toml_path = Path(cargo_toml).resolve() - if cargo_toml_path.is_dir(): - cargo_toml_path = cargo_toml_path / "Cargo.toml" - - crate = tools.Crate(cargo_toml=cargo_toml_path) - jsonl_path = crate.rust_src_path.with_suffix(".jsonl") - - return jsonl_path, cargo_toml_path - - -def split_examples( - student: Path, teacher: Path -) -> tuple[list[dspy.Example], list[dspy.Example]]: - train_examples, val_examples = [], [] - for student_cargo_toml, teacher_cargo_toml in zip( - student.read_text().splitlines(), teacher.read_text().splitlines() - ): - student_jsonl, _ = get_crate_and_data_paths(student_cargo_toml) - teacher_jsonl, teacher_cargo_toml_path = get_crate_and_data_paths(teacher_cargo_toml) - - # Find out where the student fails - student_success = defaultdict(bool) - for jsonl in student_jsonl.read_text().splitlines(): - student_translation = json.loads(jsonl) - if student_translation["success"]: - student_success[student_translation["snippet_name"]] = True - - # Accumulate all successful teacher translations for the crate - crate_translation = OrderedDict() - for jsonl in teacher_jsonl.read_text().splitlines(): - teacher_translation = json.loads(jsonl) - if teacher_translation["success"]: - crate_translation[teacher_translation["snippet_name"]] = teacher_translation[ - "translation" - ] - - # Create examples using trajectories of successful translations - for jsonl_teacher in teacher_jsonl.read_text().splitlines(): - teacher_translation = json.loads(jsonl_teacher) - example = dspy.Example( - crate_path=teacher_cargo_toml_path.parent, - crate_translation=crate_translation, - **teacher_translation, - ).with_inputs( - "reference_code", - "snippet", - "dependent_code", - "prior_translation", - "feedback", - ) - - # Use failed translations for validation - if ( - student_success[teacher_translation["snippet_name"]] - and teacher_translation["success"] - ): - train_examples.append(example) - else: - val_examples.append(example) - return train_examples, val_examples - - -@hydra.main(version_base=None, config_name="learn.translate") -def main(cfg: TrainConfig) -> None: - logging.getLogger("dspy").propagate = True - logging.getLogger("httpx").setLevel(logging.WARNING) - output_dir = Path(HydraConfig.get().runtime.output_dir) - logger.info(f"Saving results to {output_dir}") - - model.configure(cfg.model, cfg.generate) - reflection_lm = model.get_lm(cfg.reflect_model, cfg.reflect_generate) - - # Construct train/val sets - trainset, valset = split_examples(cfg.student_examples, cfg.teacher_examples) - - if len(trainset) == 0: - raise ValueError("Learning requires at least one correct symbol translation!") - if len(valset) == 0: - raise ValueError("All symbols are already perfectly translated!") - - gepa = dspy.GEPA( - metric=metric, - num_threads=min(len(valset), 32), - log_dir=str(output_dir), - auto="light", - reflection_lm=reflection_lm, - reflection_minibatch_size=min(len(trainset), 8), - skip_perfect_score=False, - ) - - program = dspy.ChainOfThought(SnippetTranslatorSignature) - optimized_program = gepa.compile( - program, - trainset=trainset, - valset=valset, - ) - print(optimized_program) - optimized_program.save(output_dir / "optimized_program.json") - optimized_program.save(output_dir / "optimized_program.pkl") - optimized_program.save(output_dir / "optimized_program", save_program=True) - f = open(output_dir / "optimized_program_history.pkl", "wb") - pickle.dump(optimized_program.history, f) - f.close() - - -if __name__ == "__main__": - main() diff --git a/src/ideas/model.py b/src/ideas/model.py index 210395c..cbeac35 100644 --- a/src/ideas/model.py +++ b/src/ideas/model.py @@ -23,7 +23,7 @@ class ModelConfig: @dataclass class GenerateConfig: - max_new_tokens: int = 64000 + max_new_tokens: int = 128000 temperature: float = 0.0 top_p: float = 1.0 top_k: int | None = None @@ -55,22 +55,37 @@ def get_lm(model: ModelConfig, generate: GenerateConfig) -> dspy.LM: if "anthropic" in model.name: provider["order"] = ["anthropic", "anthropic/2", "google-vertex/us-east5", "azure"] - # Require fp8 and limit prices for qwen3-coder - if model.name.lower().endswith("qwen/qwen3-coder"): - provider["quantizations"] = ["fp8"] - provider["max_price"] = {"prompt": 0.5, "completion": 2} - lm.kwargs["provider"] = provider # type: ignore[reportArgumentType] # Mask and/or disable reasoning if desired and possible if model.text_output: lm.kwargs["reasoning"] = {"exclude": True} # type: ignore[reportArgumentType] - if model.name.startswith("openrouter/x-ai"): - lm.kwargs["reasoning"].update({"effort": "none"}) # type: ignore[reportArgumentType] return lm def configure(model: ModelConfig, generate: GenerateConfig): lm = get_lm(model, generate) - dspy.configure(lm=lm) + dspy.configure(lm=lm, track_usage=True) + + +def format_usage(pred: dspy.Prediction) -> str: + # get_lm_usage() returns dict[lm_name, dict[str, Any]] — aggregate across all LMs + lm_usage = pred.get_lm_usage() + if lm_usage is None: + return "unknown usage" + + usage: dict[str, Any] = {} + for per_lm in lm_usage.values(): + for key, value in per_lm.items(): + if isinstance(value, (int, float)): + usage[key] = usage.get(key, 0) + value + + prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens") or 0 + completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens") or 0 + total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens) + cost_usd = usage.get("cost") or usage.get("cost_usd") or usage.get("total_cost") + + cost = f"${cost_usd:.4f}, " if cost_usd is not None else "" + + return f"{cost}{total_tokens:,} tok ({prompt_tokens:,} in / {completion_tokens:,} out)" diff --git a/src/ideas/sync.rs b/src/ideas/sync.rs deleted file mode 100644 index 2c1f4e9..0000000 --- a/src/ideas/sync.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::fmt; -use std::ops::{Deref, DerefMut}; -use std::ptr; -use std::sync::{ - LockResult, Mutex as StdMutex, MutexGuard as StdMutexGuard, PoisonError, TryLockError, - TryLockResult, -}; - -#[repr(C)] -pub struct Mutex { - // Keep the payload first so it stays at offset 0 for C interop. - pub value: T, - inner: StdMutex<()>, -} - -pub struct MutexGuard<'a, T: Sized> { - _inner: StdMutexGuard<'a, ()>, - value: &'a mut T, -} - -impl Mutex { - pub const fn new(value: T) -> Self { - Self { - value, - inner: StdMutex::new(()), - } - } - - pub fn into_inner(self) -> LockResult { - let Self { inner, value } = self; - - match inner.into_inner() { - Ok(()) => Ok(value), - Err(_) => Err(PoisonError::new(value)), - } - } -} - -impl Mutex { - pub fn lock(&self) -> LockResult> { - match self.inner.lock() { - Ok(inner) => Ok(self.guard_from_inner(inner)), - Err(error) => Err(PoisonError::new(self.guard_from_inner(error.into_inner()))), - } - } - - pub fn try_lock(&self) -> TryLockResult> { - match self.inner.try_lock() { - Ok(inner) => Ok(self.guard_from_inner(inner)), - Err(TryLockError::WouldBlock) => Err(TryLockError::WouldBlock), - Err(TryLockError::Poisoned(error)) => Err(TryLockError::Poisoned(PoisonError::new( - self.guard_from_inner(error.into_inner()), - ))), - } - } - - pub fn is_poisoned(&self) -> bool { - self.inner.is_poisoned() - } - - pub fn clear_poison(&self) { - self.inner.clear_poison(); - } - - pub fn get_mut(&mut self) -> LockResult<&mut T> { - match self.inner.get_mut() { - Ok(()) => Ok(&mut self.value), - Err(_) => Err(PoisonError::new(&mut self.value)), - } - } - - fn guard_from_inner<'a>(&'a self, inner: StdMutexGuard<'a, ()>) -> MutexGuard<'a, T> { - MutexGuard { - _inner: inner, - value: unsafe { &mut *self.data_ptr() }, - } - } - - pub fn data_ptr(&self) -> *mut T { - ptr::addr_of!(self.value).cast_mut() - } -} - -impl From for Mutex { - fn from(value: T) -> Self { - Self::new(value) - } -} - -impl Default for Mutex { - fn default() -> Self { - Self::new(T::default()) - } -} - -impl fmt::Debug for Mutex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.try_lock() { - Ok(guard) => f.debug_struct("Mutex").field("data", &&*guard).finish(), - Err(TryLockError::Poisoned(error)) => { - let guard = error.into_inner(); - f.debug_struct("Mutex") - .field("data", &&*guard) - .field("poisoned", &true) - .finish() - } - Err(TryLockError::WouldBlock) => f - .debug_struct("Mutex") - .field("data", &format_args!("")) - .finish(), - } - } -} - -impl Deref for MutexGuard<'_, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - self.value - } -} - -impl DerefMut for MutexGuard<'_, T> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.value - } -} - -impl fmt::Debug for MutexGuard<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.value, f) - } -} - -unsafe impl Send for Mutex {} -unsafe impl Sync for Mutex {} diff --git a/src/ideas/test_symbol.py b/src/ideas/test_symbol.py index dced680..ce4c01f 100644 --- a/src/ideas/test_symbol.py +++ b/src/ideas/test_symbol.py @@ -4,141 +4,40 @@ # SPDX-License-Identifier: Apache-2.0 # -import re +import json import logging -import textwrap -from pathlib import Path import dspy -from .tools import Crate, run_subprocess -from .ast import Symbol, clang_make_extern_ +from ideas.tools import Crate +from ideas.ast import Symbol +from ideas.init.build import write_main_binding logger = logging.getLogger("ideas.test_symbol") class SymbolTester(dspy.Module): - def __init__(self, crate: Crate, symbols: list[Symbol]): + def __init__(self, crate: Crate, symbols: list[Symbol], tests: str): super().__init__() self.crate = crate + self.tests = tests - # Write a build script to compile C code as a static library and link to it - self.write_build_script_() - - # Generate a Rust binding for any global function since we need to force the Rust - # linker to include that C function in the Rust artifact. - # FIXME: If we ever test variables we should generate bindings for those here too! - binding_path = self.crate.rust_src_path.parent / "binding.rs" - binding_path.write_text("") - self.main_function = "" for symbol in symbols: if not (symbol.is_function and symbol.is_definition and symbol.is_global): continue if self.crate.is_bin and symbol.spelling == "main": # main requires special handling because we must bind to it as _main and # statically create a Rust main that calls it - self.main_function = self.write_main_binding() - else: - self.write_symbol_binding_(symbol.spelling) - - # These files are modified by test - orig_binding_src = binding_path.read_bytes() - orig_rust_src = self.crate.rust_src_path.read_bytes() - - # Check whether all of the changes compile and commit them - passes, output = self.test() - msg = f"Prepared `{self.crate.root_package['name']}` for symbol testing!" - if not passes: - msg = f"Failed to prepare `{self.crate.root_package['name']}` for symbol testing!" - self.crate.vcs.commit(msg) - - # Restore originals - binding_path.write_bytes(orig_binding_src) - self.crate.rust_src_path.write_bytes(orig_rust_src) - - # Error loudly if changes don't build - if not passes: - msg += output - raise ValueError(msg) - - def write_build_script_(self): - c_src_path = self.crate.c_src_path.relative_to(self.crate.cargo_toml.parent) - build_options = '.define("main", "_main")' if self.crate.is_bin else "" - build_rs_src = textwrap.dedent( - f""" - fn main() {{ - println!("cargo:rerun-if-changed={c_src_path}"); - cc::Build::new() - .compiler("clang") - .warnings(false) - .file("{c_src_path}") - {build_options} - .compile("library"); - println!("cargo:rustc-link-lib=static=library"); - // FIXME: How do we statically add libraries to link to? - println!("cargo:rustc-link-lib=dylib=crypto"); - }} - """ - ) - - build_rs_path = self.crate.cargo_toml.parent / "build.rs" - build_rs_path.write_text(build_rs_src) - self.crate.vcs.add(build_rs_path) - - def write_symbol_binding_(self, symbol_name: str): - symbol_binding = get_linked_binding(symbol_name, self.crate.c_src_path) + self.main_function: str = write_main_binding(crate) - symbol_binding_path = self.crate.rust_src_path.parent / "binding" / f"{symbol_name}.rs" - symbol_binding_path.parent.mkdir(exist_ok=True) - symbol_binding_path.write_text(symbol_binding) - self.crate.vcs.add(symbol_binding_path) - - binding_path = self.crate.rust_src_path.parent / "binding.rs" - with binding_path.open("a+") as f: - f.write(f"pub mod {symbol_name};\n") - self.crate.vcs.add(binding_path) - - def write_main_binding(self) -> str: - # Get binding for main (redefined as _main) - main_binding = get_linked_binding("_main", self.crate.c_src_path, "-Dmain=_main") - - main_binding_path = self.crate.rust_src_path.parent / "binding" / "main.rs" - main_binding_path.parent.mkdir(exist_ok=True) - main_binding_path.write_text(main_binding) - self.crate.vcs.add(main_binding_path) - - # Return appropriate main function instead of writing to binding.rs - if "fn _main()" in main_binding: - return textwrap.dedent( - """ - pub fn main() { - let ret = unsafe { binding::main::_main() }; - std::process::exit(ret); - } - """ - ) - else: - return textwrap.dedent( - """ - pub fn main() { - let mut args: Vec<_> = std::env::args().into_iter().map(|s| std::ffi::CString::new(s).unwrap().into_raw()).collect(); - let ret = unsafe { binding::main::_main(args.len() as i32, args.as_mut_ptr()) }; - std::process::exit(ret); - } - """ - ) - - def test(self) -> tuple[bool, str]: + def test( + self, tests: str, skip: list[str] | None = None + ) -> tuple[bool, dict[str, bool], str]: rust_src = self.crate.rust_src_path.read_text() # Remove forbid unsafe from Rust source rust_src = rust_src.replace("#![forbid(unsafe_code)]", "") - # Replace Rust Mutex with C ABI-compatible Mutex in Rust source - RUST_MUTEX = "use std::sync::{Mutex, MutexGuard};" - C_ABI_MUTEX = "mod sync;\nuse crate::sync::{Mutex, MutexGuard};" - rust_src = rust_src.replace(RUST_MUTEX, C_ABI_MUTEX) - # Reference wrapper module in Rust source WRAPPER_MOD = "pub mod wrapper;" if WRAPPER_MOD not in rust_src: @@ -156,30 +55,26 @@ def test(self) -> tuple[bool, str]: self.crate.rust_src_path.write_text(rust_src) # Try building the crate to detect if we need to insert a main - builds, feedback = self.crate.cargo_build(allow_unsafe=True, fix_E0601=False) + builds, feedback = self.crate.cargo_build(fix_E0601=False) if "error[E0601]" in feedback and self.main_function: - with binding_path.open("a+") as f: - f.write("pub mod main;\n") with self.crate.rust_src_path.open("a+") as f: f.write(self.main_function) self.crate.vcs.add(wrapper_path, binding_path, self.crate.rust_src_path) # Make sure the crate builds before testing - builds, feedback = self.crate.cargo_build(allow_unsafe=True, fix_E0601=False) + builds, feedback = self.crate.cargo_build(fix_E0601=False) if not builds: raise RuntimeError(f"Crate does not build!\n{feedback}") - passes, output, error, _ = self.crate.cargo_test() - return passes, output + error - def forward(self, symbol: Symbol) -> dspy.Prediction: - logger.info(f"Testing symbol `{symbol.name}` ....") + passes, jsonl, error, _ = self.crate.cargo_test( + tests, skip=skip, test_harness="nextest run", message_format="libtest-json" + ) + results = extract_test_results(jsonl) + return passes, results, error - # Overwrite C symbol to reference extern symbol that we will link to the Rust symbol. - # It is very important that this happens first since it will overwrite any other changes - # made to the C code. - clang_make_extern_(self.crate.c_src_path, symbol.spelling) - self.crate.vcs.add(self.crate.c_src_path) + def forward(self, symbol: Symbol, skip: list[str] | None = None) -> dspy.Prediction: + logger.info(f"Testing symbol `{symbol.name}` ....") # These files are modified by test binding_path = self.crate.rust_src_path.parent / "binding.rs" @@ -187,9 +82,11 @@ def forward(self, symbol: Symbol) -> dspy.Prediction: orig_rust_src = self.crate.rust_src_path.read_bytes() # Run cargo test - passes, feedback = self.test() - msg = f"Tested symbol `{symbol.name}`" - if not passes: + passes, results, feedback = self.test(self.tests, skip=skip) + if passes: + msg = f"Tested symbol `{symbol.name}`" + logger.info(msg) + else: feedback = "Running `cargo test` fails!\n" + feedback msg = f"Failed to test symbol `{symbol.name}`" logger.error(msg) @@ -200,39 +97,42 @@ def forward(self, symbol: Symbol) -> dspy.Prediction: binding_path.write_bytes(orig_binding_src) self.crate.rust_src_path.write_bytes(orig_rust_src) - pred = dspy.Prediction(success=passes) + pred = dspy.Prediction( + success=passes, + output=feedback, + results=results, + feedback="", + ) + if not passes: # FIXME: Use test feedback? - pred.feedback = "Carefully compare the Rust translation in `prior_translation` with the C `snippet` and find where any mis-translations happen. Then use this knowledge to generate a correct Rust `translation` of the C `snippet`. You should treat the C `snippet` as correct, so if the C `snippet` has a bug, you should replicate that bug in the Rust `translation` too." + pred.feedback = ( + "The current Rust translation in `prior_translation` does not match the behavior of the C `snippet`. " + "Carefully compare `prior_translation` against the C `snippet` and regenerate the Rust `translation` to match the C behavior exactly. " + "Do not assume inputs are well-formed: if the tests exercise malformed, invalid, partial, or adversarial input, preserve the C behavior for those cases too, including error returns, boundary handling, or other observable effects. " + "Make minimal, targeted changes to `prior_translation`, and only modify what is necessary to match the C behavior. " + "Treat the C `snippet` as the source of truth, even if it contains a bug." + ) + return pred -def get_linked_binding(function_name: str, c_src_path: Path, *bindgen_args: str) -> str: - # Use bindgen to generate binding to C symbol - bindgen = [ - "bindgen", - "--disable-header-comment", - "--no-doc-comments", - "--no-layout-tests", - "--allowlist-function", - function_name, - str(c_src_path), - "--", - *bindgen_args, - ] - ok, binding, error, _ = run_subprocess(bindgen) - if not ok: - raise ValueError(f"`{' '.join(bindgen)}` failed!\n{binding + error}") - - # Parse binding since we need to add special link instructions - linked_binding = re.sub( - r'unsafe extern "C" {\n(.*)\n}', - r'#[link(name="library", kind="static")]\nunsafe extern "C" {\n #[unsafe(no_mangle)]\n\1\n}', - binding, - flags=re.DOTALL, - ) - if linked_binding == binding: - raise ValueError( - f"Failed to convert binding to linked binding for {function_name}!\n{binding}" - ) - return linked_binding +def extract_test_results(output: str) -> dict[str, bool]: + test_results: dict[str, bool] = {} + + for line in output.splitlines(): + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + if obj.get("type") != "test": + continue + event = obj.get("event") + if event not in {"ok", "failed", "ignored"}: + continue + name = str(obj.get("name", "")).rsplit("$", 1)[-1].strip() + if name: + # Treat ignored as non-failing for disable-list purposes + test_results[name] = event != "failed" + + return test_results diff --git a/src/ideas/tools.py b/src/ideas/tools.py index d3546e4..3a1841d 100644 --- a/src/ideas/tools.py +++ b/src/ideas/tools.py @@ -5,9 +5,9 @@ # import os +import re import json -from json import loads as js_loads -from textwrap import dedent as d +import shutil import tomlkit import logging @@ -58,10 +58,39 @@ def add(self, *paths: Path) -> bool: raise ValueError(f"Failed to add {path}!\n{out}") return ok + def rm(self, *paths: Path, force: bool = False) -> bool: + if self.vcs == "none": + ret = True + for path in paths: + target = path if path.is_absolute() else self.repo_dir / path + try: + if target.is_dir(): + shutil.rmtree(target) + else: + target.unlink() + except Exception: + if not force: + ret = False + return ret + + ret = True + for path in paths: + cmd = ["rm"] + if path.is_dir(): + cmd.append("-r") + if force: + cmd.append("-f") + + ok, out = self(" ".join([*cmd, str(path)])) + if not force: + ret = ret and ok + return ret + def commit(self, message: str = "") -> bool: if self.vcs == "none": return True + message = message.replace("\x00", "") ok, out = self("commit --allow-empty -F -", input=message) if not ok: raise ValueError(f"Failed to commit changes to git!\n{out}") @@ -91,12 +120,8 @@ def __init__( if not self.cargo_toml.exists(): # Create a new workspace os.makedirs(workspace_dir, exist_ok=True) - self.cargo_toml.write_text( - d(""" - [workspace] - resolver = "3" - """).strip() - ) + contents = {"workspace": {"resolver": "3"}} + self.cargo_toml.write_text(tomlkit.dumps(contents)) # Initialize repository if needed self.vcs.init(force_init=True) @@ -136,9 +161,6 @@ def __init__( f"Failed to create new crate at {crate_dir} with error:\n\n{output + error}" ) - # Add unsafe feature that allow unsafe code - self.cargo_feature(unsafe=[]) - # Initialize repository if needed self.vcs.init() @@ -213,7 +235,9 @@ def rust_src_path(self) -> Path: def c_src_path(self) -> Path: return self.rust_src_path.with_suffix(".c") - def cargo_add(self, dep: str, section: str | None = None) -> str: + def cargo_add( + self, dep: str, section: str | None = None, features: list[str] | None = None + ) -> str: cmd = [ "cargo", "add", @@ -222,6 +246,8 @@ def cargo_add(self, dep: str, section: str | None = None) -> str: ] if section: cmd.append(f"--{section}") + if features: + cmd.append(f"--features={','.join(features)}") cmd.append(dep) success, output, error, _ = run_subprocess(cmd) @@ -246,9 +272,20 @@ def cargo_feature(self, **features: list[str]) -> None: # Invalidate cached metadata self.invalidate_metadata() - def cargo_build( - self, allow_unsafe: bool = False, fix_E0601: bool = True - ) -> tuple[bool, str]: + def cargo_clean(self) -> None: + cmd = [ + "cargo", + "clean", + "--quiet", + f"--manifest-path={self.cargo_toml}", + ] + success, output, error, _ = run_subprocess(cmd) + if not success: + raise RuntimeError( + f"Failed to clean crate at {self.cargo_toml} with error:\n\n{output + error}" + ) + + def cargo_build(self, fix_E0601: bool = True) -> tuple[bool, str]: cmd = [ "cargo", "build", @@ -256,8 +293,6 @@ def cargo_build( "--color=never", f"--manifest-path={self.cargo_toml}", ] - if allow_unsafe: - cmd += ["--features=unsafe"] builds, output, error, _ = run_subprocess(cmd) # Work around E0601 error "No main function was found in a binary crate." @@ -270,16 +305,71 @@ def cargo_build( return builds, output + error - def cargo_test(self) -> tuple[bool, str, str, int | Literal["timeout"]]: + def cargo_test( + self, + name: str, + test_harness: Literal["nextest run", "test"] = "nextest run", + quiet: bool = True, + fail_fast: bool = False, + build_only: bool = False, + skip: list[str] | None = None, + message_format: str | None = None, + ) -> tuple[bool, str, str, int | Literal["timeout"]]: cmd = [ "cargo", - "test", - "--quiet", + *test_harness.split(), "--color=never", f"--manifest-path={self.cargo_toml}", - "--features=unsafe", ] - return run_subprocess(cmd) + if not fail_fast: + cmd.append("--no-fail-fast") + if quiet: + if test_harness == "nextest run": + cmd.append("--cargo-quiet") + elif test_harness == "test": + cmd.append("--quiet") + else: + raise ValueError(f"Unsupported test harness: {test_harness}") + if name: + cmd.extend(["--test", name]) + if build_only: + cmd.append("--no-run") + + env = os.environ.copy() + if message_format is not None: + cmd.extend(["--message-format", message_format]) + if message_format == "libtest-json" and test_harness == "nextest run": + # https://nexte.st/docs/machine-readable/libtest-json/ + env["NEXTEST_EXPERIMENTAL_LIBTEST_JSON"] = "1" + if skip: + if test_harness == "nextest run": + excluded_tests = [f"test(/^{re.escape(test_name)}$/)" for test_name in skip] + expr = " and ".join(f"not {test_expr}" for test_expr in excluded_tests) + cmd.extend(["-E", expr]) + else: + cmd.append("--") + cmd.append("--exact") + for test_name in skip: + cmd.extend(["--skip", test_name]) + + return run_subprocess(cmd, env=env) + + def cargo_nextest_config(self, slow: int = 30, terminate_after: int = 4) -> None: + nextest_config_path = self.workspace_root / ".config" / "nextest.toml" + nextest_config_path.parent.mkdir(exist_ok=True) + nextest_config = { + "profile": { + "default": { + "slow-timeout": {"period": f"{slow}s", "terminate-after": terminate_after}, + "final-status-level": "none", + "fail-fast": False, + "failure-output": "never", + "test-threads": 1, + } + } + } + nextest_config_path.write_text(tomlkit.dumps(nextest_config)) + self.invalidate_metadata() def write(self, path: Path, data, **kwargs): if path.is_absolute(): @@ -288,6 +378,38 @@ def write(self, path: Path, data, **kwargs): return path.write_text(data, **kwargs) +def nextest_json_to_libtest(stdout: str) -> str: + """Convert nextest libtest-json output to vanilla `cargo test` text format.""" + lines = [] + summary = {} + for raw in stdout.splitlines(): + obj = json.loads(raw) + + if obj.get("type") == "test": + event = obj.get("event") + if event not in {"ok", "failed", "ignored"}: + continue + + # nextest uses "$" to join binary::suite$test_name + name = obj["name"].rsplit("$", 1)[-1] + status = "FAILED" if event == "failed" else event + lines.append(f"test {name} ... {status}") + + elif obj.get("type") == "suite" and obj.get("event") != "started": + summary = obj + + # Append summary from the suite event (or zeros if missing) + p, f = summary.get("passed", 0), summary.get("failed", 0) + ig, m = summary.get("ignored", 0), summary.get("measured", 0) + fo = summary.get("filtered_out", 0) + result = "FAILED" if f else "ok" + lines.append( + f"test result: {result}. {p} passed; {f} failed; " + f"{ig} ignored; {m} measured; {fo} filtered out" + ) + return "\n".join(lines) + "\n" + + def run_subprocess( cmd: list[str], input: str | None = None, @@ -316,23 +438,6 @@ def run_subprocess( ) -def compile_c( - source_file: str, output_file: str, flags: list[str] | None = None -) -> tuple[bool, str]: - cmd = ["clang-21"] - - if flags: - cmd.extend(flags) - else: - cmd.append("-Wall") - - cmd.append(source_file) - cmd.extend(["-o", output_file]) - - success, output, error, _ = run_subprocess(cmd) - return success, output + error - - def check_c( code: str, *, @@ -353,28 +458,6 @@ def check_c( return success, output + error -def compile_rust( - code: str, - output_file: Path, - *, - flags: list[str] | None = None, - structured_output: bool = False, -) -> tuple[bool, str]: - cmd = ["rustc"] - - if flags: - cmd.extend(flags) - - if structured_output: - cmd.append("--error-format=json") - - cmd.append("-") - cmd.extend(["-o", str(output_file)]) - - success, output, error, _ = run_subprocess(cmd, input=code) - return success, output + error - - def check_rust( code: str, *, @@ -396,60 +479,9 @@ def check_rust( return success, output + error -def run_clippy( - source_file: str, flags: list[list[str]] | None = None, structured_output: bool = False -) -> list[tuple[bool, str]]: - base_cmd = ["clippy-driver"] - - if structured_output: - base_cmd.append("--error-format=json") - - if not flags: - flags = [ - ["-D", "correctness"], - ["-W", "suspicious"], - ["-W", "complexity"], - ["-W", "perf"], - ["-W", "style"], - ] - - res = [] - for opt in flags: - cmd = base_cmd.copy() - cmd.extend(opt) - cmd.append(source_file) - res.append(run_subprocess(cmd)) - - return res - - -def tool_output_to_js_dict(out: str | list[str]) -> list[dict[str, Any]]: - if isinstance(out, str): - out = [out] - - def map_single_str(s: str) -> list[dict[str, Any]]: - js_list = [] - # rustc outputs multiple lines, each representing a json object - for line in s.split("\n"): - stripped = line.strip() - if stripped: - js_list.append(js_loads(stripped)) - - return js_list - - # clippy tool call is several individual calls; we can process them together as a list - js_list = [] - for s in out: - js_list.extend(map_single_str(s)) - return js_list - - -def structured_to_rendered(js_dict: list[dict[str, Any]]) -> str: - rendered = "" - for single_msg in js_dict: - if r := single_msg["rendered"]: - rendered += r - return rendered +def rustfmt(path: Path) -> None: + cmd = ["rustfmt", str(path)] + run_subprocess(cmd) def run_test( @@ -516,5 +548,4 @@ def _in_env(var_name: str, default: bool = True) -> bool: return value.strip().lower() in {"1", "true", "yes", "on"} -HYBRID_BUILD = _in_env("HYBRID_BUILD", default=True) -STATIC_TRANSLATIONS = HYBRID_BUILD or _in_env("STATIC_TRANSLATIONS", default=True) +LARGE_PROJECT = _in_env("LARGE_PROJECT", default=False) diff --git a/src/ideas/translate.py b/src/ideas/translate.py index ce40c0b..86326ec 100644 --- a/src/ideas/translate.py +++ b/src/ideas/translate.py @@ -18,8 +18,9 @@ from ideas import adapters, model, ModelConfig, GenerateConfig from ideas import SnippetTranslator, RecurrentTranslator, WrapperGenerator, SymbolTester from ideas import create_translation_unit, extract_info_c +from ideas.ast_rust import mangle from ideas.init.consolidate import get_symbols_and_dependencies -from .tools import Crate, HYBRID_BUILD +from .tools import Crate, LARGE_PROJECT logger = logging.getLogger("ideas.translate") @@ -31,12 +32,12 @@ class TranslateConfig: generate: GenerateConfig = field(default_factory=GenerateConfig) cargo_toml: Path = MISSING + tests: str = MISSING translator: str = "ChainOfThought" translator_max_iters: int = 5 wrapper_max_iters: int = 5 - max_iters: int = 5 - readonly_cache: Path | None = None + max_iters: int = 3 vcs: str = "none" @@ -55,52 +56,47 @@ def _main(cfg: TranslateConfig) -> None: # Make sure Rust source is in known state (i.e., empty) crate.rust_src_path.write_text("") + if LARGE_PROJECT and (crate.cargo_toml.parent / "build.rs").exists(): + (crate.cargo_toml.parent / "build.rs").unlink() + crate.vcs.rm(crate.cargo_toml.parent / "build.rs", force=True) # Get global symbol table tu = create_translation_unit(cfg.filename) asts = [extract_info_c(tu)] symbols, dependencies = get_symbols_and_dependencies( - asts, source_priority=[], external_symbol_names=["c:@F@main"] if crate.is_bin else None + asts, external_symbol_names=["c:@F@main"] if crate.is_bin else None ) - global_functions = [ - s for s in symbols.values() if s.is_global and (s.is_function and s.is_definition) - ] - if not global_functions: - logger.info("No global functions to translate!") - return # Create translation agent model.configure(cfg.model, cfg.generate) dspy.configure(adapter=adapters.ChatAdapter()) translator = getattr(dspy, cfg.translator) - snippet_translator = SnippetTranslator( - translator, crate, cfg.translator_max_iters, readonly_cache=cfg.readonly_cache - ) - symbol_wrapper = WrapperGenerator( - crate, cfg.wrapper_max_iters, readonly_cache=cfg.readonly_cache - ) - symbol_tester = None - if HYBRID_BUILD: - symbol_tester = SymbolTester(crate, symbols=global_functions) + snippet_translator = SnippetTranslator(translator, crate, cfg.translator_max_iters) + symbol_wrapper, symbol_tester = None, None + if not LARGE_PROJECT: + symbol_wrapper = WrapperGenerator(crate, cfg.wrapper_max_iters) + symbol_tester = SymbolTester(crate, symbols=list(symbols.values()), tests=cfg.tests) agent = RecurrentTranslator( crate, snippet_translator, symbol_wrapper, symbol_tester, cfg.max_iters ) # Run translation agent and write it to disk pred = agent(symbols, dependencies) - crate.rust_src_path.write_text(pred.translation) + crate.rust_src_path.write_text(pred.translation.text) + usage = model.format_usage(pred) if pred.success: - # FIXME: Only keep wrappers for symbols we need to export - - msg = f"Translated `{crate.root_package['name']}` to Rust!" + msg = f"Translated `{crate.root_package['name']}` to Rust: {usage}" logger.info(msg) else: # Restore original C code so next agent can use it crate.c_src_path.write_bytes(orig_c_src) - msg = f"Failed to translate `{crate.root_package['name']}` to Rust!" + msg = f"Failed to translate `{crate.root_package['name']}`: {usage}" logger.error(msg) + # Clean up intermediate artifacts produced during translation + _cleanup(crate, symbols) + # Commit translation if (output_subdir := HydraConfig.get().output_subdir) is not None: crate.vcs.add(output_dir / output_subdir) @@ -108,6 +104,42 @@ def _main(cfg: TranslateConfig) -> None: crate.vcs.commit(msg) +def _cleanup(crate: Crate, symbols: dict) -> None: + # Remove bindgen artifacts + crate.vcs.rm( + crate.rust_src_path.parent / "binding", + crate.rust_src_path.parent / "binding.rs", + force=True, + ) + logger.info("Removed bindgen artifacts") + + # Remove wrappers for symbols that are not globally linked + keepers = { + mangle(s.spelling) + for s in symbols.values() + if s.is_global + and not crate.is_bin + and (s.is_variable or (s.is_function and s.is_definition)) + } + wrapper_dir = crate.rust_src_path.parent / "wrapper" + wrapper_module = crate.rust_src_path.parent / "wrapper.rs" + + lines = wrapper_module.read_text().splitlines() if wrapper_module.exists() else [] + if wrapper_dir.exists(): + for wrapper_file in wrapper_dir.glob("*.rs"): + if wrapper_file.stem not in keepers: + crate.vcs.rm(wrapper_file, force=True) + logger.info(f"Removed non-global wrapper: {wrapper_file.name}") + mod_line = f"pub mod {wrapper_file.stem};" + if mod_line in lines: + lines.remove(mod_line) + if lines: + wrapper_module.write_text("\n".join(lines) + "\n") + crate.vcs.add(wrapper_module) + else: + crate.vcs.rm(wrapper_module, wrapper_dir, force=True) + + @hydra.main(version_base=None, config_name="translate") def main(cfg: TranslateConfig) -> None: try: diff --git a/src/ideas/translate_recurrent.py b/src/ideas/translate_recurrent.py index 9537285..9c7671b 100644 --- a/src/ideas/translate_recurrent.py +++ b/src/ideas/translate_recurrent.py @@ -6,24 +6,28 @@ import logging from pathlib import Path -from difflib import unified_diff from collections.abc import Iterable import dspy import networkx as nx -from .ast import Symbol -from .tools import Crate, STATIC_TRANSLATIONS +from .ast import CodeC, Symbol +from .ast_rust import CodeRust, get_signatures +from .tools import Crate, LARGE_PROJECT +from .init.consolidate import create_symbol_lexical_key_fn logger = logging.getLogger("ideas.translate_recurrent") +SymbolName = str +SymbolGroup = tuple[SymbolName, ...] + class RecurrentTranslator(dspy.Module): def __init__( self, crate: Crate, symbol_translator: dspy.Module, - symbol_wrapper: dspy.Module, + symbol_wrapper: dspy.Module | None = None, symbol_tester: dspy.Module | None = None, max_iters: int = 1, ): @@ -33,110 +37,129 @@ def __init__( self.wrap_symbol = symbol_wrapper self.test_symbol = symbol_tester self.max_iters = max_iters + self._failed_tests: set[str] = set() def forward( self, - symbols: dict[str, Symbol], - dependencies: dict[tuple[str, ...], Iterable[tuple[str, ...]]], + symbols: dict[SymbolName, Symbol], + dependencies: dict[SymbolGroup, Iterable[SymbolGroup]], ) -> dspy.Prediction: - G = nx.from_dict_of_lists(dependencies, create_using=nx.DiGraph) - assert isinstance(G, nx.DiGraph) - - # FIXME: This is from SnippetTranslator - self.crate.rust_src_path.write_text("use std::sync::{Mutex, MutexGuard};\n\n") + # We always start with an empty crate + self.crate.rust_src_path.write_text("") + self._failed_tests = set() # Translate symbols in topological order - snippets: dict[str, tuple[str, ...]] = {} - translations: dict[tuple[str, ...], str] = {} - sorted_symbol_names = list(reversed(list(nx.topological_sort(G)))) - symbol_names_with_variable = list( - filter( - lambda symbol_names: any(symbols[n].is_variable for n in symbol_names), - sorted_symbol_names, + G = nx.from_dict_of_lists(dependencies, create_using=nx.DiGraph) + assert isinstance(G, nx.DiGraph) + groups = list( + nx.lexicographical_topological_sort( + G.reverse(copy=False), key=create_symbol_lexical_key_fn(symbols) ) ) - symbols_count = len(sorted_symbol_names) - for i, symbol_names in enumerate(sorted_symbol_names, start=1): - logger.info( - f"Translating symbol group `{' '.join(symbol_names)}` ({i}/{symbols_count}) ..." - ) + + snippets: dict[CodeC, SymbolGroup] = {} + translations: dict[SymbolGroup, CodeRust] = {} + count = len(groups) + for i, group in enumerate(groups, start=1): + logger.info(f"Translating symbol group `{' '.join(group)}` [{i}/{count}] ...") + # Gather code for each symbol and check if we have already translated such a snippet - snippet = "\n".join(symbols[name].code.strip() + "\n" for name in symbol_names) + snippet = CodeC.join(symbols[name].code for name in group) if snippet in snippets: logger.info( - f"Skipping translation of `{' '.join(symbol_names)}` because it was already translated by `{' '.join(snippets[snippet])}`..." + f"Skipping translation of `{' '.join(group)}` because it was already translated by `{' '.join(snippets[snippet])}`..." ) - translations[symbol_names] = translations[snippets[snippet]] + translations[group] = translations[snippets[snippet]] continue - snippets[snippet] = symbol_names + snippets[snippet] = group # FIXME: We could save context here by only including translations of descendants of the current symbol. # However, one must prompt the LLM to never generate use statements since those could conflict. - # Use all unique translations as reference code since many symbol names can map to the same translation - reference_code = "\n".join( + # Use all unique (dict.fromkeys) translations as reference code since many symbol names can map to the same translation + already_translated = nx.descendants(G, group) + immediate_already_translated = set(G.successors(group)) + immediate_to_be_translated = set(G.predecessors(group)) + + reference_code = CodeRust.join( + dict.fromkeys(translations[g] for g in groups if g in translations) + ) + reference_context = CodeRust.join( dict.fromkeys( - translations[name] for name in sorted_symbol_names if name in translations + translations[g] + if g in immediate_already_translated + else get_signatures(translations[g]) + for g in groups + if g in translations ) ) - # Gather dependent code in topological order - predecessors = list(G.predecessors(symbol_names)) - dependent_code = "\n".join( - symbols[name].code.strip() + "\n" - for names in sorted_symbol_names - if names in predecessors - for name in names + # Gather support code in topological order + support_code = CodeC.join( + symbols[name].code + if g in immediate_already_translated and LARGE_PROJECT + else CodeC(symbols[name].llm_context_declaration) + for g in groups + if g in already_translated + for name in g ) - # Use static translation for any symbol that a variable depends on - static_translation = "" - if STATIC_TRANSLATIONS and any( - nx.has_path(G, group_with_variable, symbol_names) - for group_with_variable in symbol_names_with_variable - ): - static_translation = "\n".join( - symbols[name].static_translation.strip() + "\n" - for name in symbol_names - if symbols[name].static_translation != "" - ).strip() - if static_translation: - logger.info(f"Using static translation for `{' '.join(symbol_names)}`") + # Gather dependent code in topological order + dependent_code = CodeC.join( + symbols[name].code + for g in groups + if g in immediate_to_be_translated + for name in g + ) # Translate snippet and save it if successful pred = self.translate_with_retries( reference_code=reference_code, - symbols=[symbols[name] for name in symbol_names], + reference_context=reference_context, + symbols=[symbols[name] for name in group], dependent_code=dependent_code, - translation=static_translation, + support_code=support_code, ) - if not pred.success: + + if pred.failure == "translate": + # Translate failures (as opposed to wrap/test failures) are fatal break - translations[symbol_names] = pred.translation.code.strip() + "\n" + else: + # Once a test fails, skip it for all future groups in this run + newly_failed_tests = pred.failed_tests - self._failed_tests + if newly_failed_tests: + self._failed_tests.update(newly_failed_tests) + logger.info( + "Disabled the following failing tests: %s", + ", ".join(sorted(newly_failed_tests)), + ) + translations[group] = pred.translation - # Re-assemble translation in order - translation = "use std::sync::{Mutex, MutexGuard};\n\n" - translation += "\n".join( - dict.fromkeys( - translations[name] for name in sorted_symbol_names if name in translations - ) + # Re-assemble unique (dict.fromkeys) translations in order + translation = CodeRust.join( + dict.fromkeys(translations[group] for group in groups if group in translations) ) if not self.crate.is_bin: - translation += "\npub mod wrapper;\n" + translation += CodeRust("pub mod wrapper;") pred = dspy.Prediction( - translation=translation, success=len(translations) == len(sorted_symbol_names) + translation=translation, success=len(translations) == len(groups) ) return pred def translate_with_retries( self, - reference_code: str, + reference_code: CodeRust, + reference_context: CodeRust, symbols: list[Symbol], - dependent_code: str, - translation: str = "", + dependent_code: CodeC, + support_code: CodeC, + prior_translation: CodeRust | None = None, + prior_wrappers: dict[str, CodeRust] | None = None, + feedback: str = "", ) -> dspy.Prediction: - prior_translation, feedback = "", "" + name = " ".join([f"`{s.name}`" for s in symbols]) pred = dspy.Prediction() - for i in range(max(self.max_iters, 1)): + num_iters = max(self.max_iters, 1) + for i in range(num_iters): # Save these in case translation fails orig_c_src = self.crate.c_src_path.read_bytes() orig_rust_src = self.crate.rust_src_path.read_bytes() @@ -145,40 +168,54 @@ def translate_with_retries( # Attempt translation and exit early on success pred = self.translate( reference_code, + reference_context, symbols, dependent_code, + support_code, prior_translation=prior_translation, + prior_wrappers=prior_wrappers, feedback=feedback, - translation=translation if i == 0 else "", ) if pred.success: break - # Restore to original state since translation failed - self.crate.c_src_path.write_bytes(orig_c_src) - self.crate.rust_src_path.write_bytes(orig_rust_src) - self._restore_wrappers(orig_wrappers_src) - - # On failure log a diff against prior translation - name = " ".join([f"`{s.name}`" for s in symbols]) - msg = f"Failed to translate symbol(s) {name} ({i + 1}/{self.max_iters})!" - if prior_translation: - diff = "\n".join( - unified_diff( - prior_translation.splitlines(), - pred.translation.code.splitlines(), - lineterm="", - fromfile="prior_translation", - tofile="current_translation", - ) + # If neither translation nor wrappers differ from previous try, then stop retrying + if ( + prior_translation is not None + and prior_translation == pred.translation + and prior_wrappers is not None + and prior_wrappers == pred.wrappers + ): + logger.error( + f"Failed to translate symbol(s) {name} due to translation loop ({i + 1}/{num_iters})!" ) - if "reasoning" in pred: - msg += f"\n# Reason\n{pred.reasoning.strip()}\n" - msg += f"\n# Translation Diff\n{diff.strip()}\n" - logger.error(msg) + break + + # Translation differs so allow another retry but log an error + logger.error(f"Failed to translate symbol(s) {name} ({i + 1}/{num_iters})!") + + # On failure, restore state based on which stage failed + if i + 1 < num_iters: + # Full restore for next retry since we haven't exhausted retries yet + self.crate.c_src_path.write_bytes(orig_c_src) + self.crate.rust_src_path.write_bytes(orig_rust_src) + self._restore_wrappers(orig_wrappers_src) + elif pred.failure == "translate": + # Full restore since a failure at this stage is fatal + self.crate.c_src_path.write_bytes(orig_c_src) + self.crate.rust_src_path.write_bytes(orig_rust_src) + self._restore_wrappers(orig_wrappers_src) + elif pred.failure == "wrap": + # Wrapper restore since they failed but hopefully translation is good + # FIXME: What if a wrapper is being tested? Seems fatal? + self._restore_wrappers(orig_wrappers_src) + elif pred.failure == "test": + # Keep wrappers and translation even though they didn't pass tests + pass # Create feedback for next iteration - prior_translation = pred.translation.code + prior_translation = pred.translation + prior_wrappers = pred.wrappers feedback = pred.feedback return pred @@ -205,34 +242,47 @@ def _restore_wrappers(self, wrappers: dict[Path, bytes]) -> None: def translate( self, - reference_code: str, + reference_code: CodeRust, + reference_context: CodeRust, symbols: list[Symbol], - dependent_code: str, - prior_translation: str = "", + dependent_code: CodeC, + support_code: CodeC, + prior_translation: CodeRust | None = None, + prior_wrappers: dict[str, CodeRust] | None = None, feedback: str = "", - translation: str = "", ) -> dspy.Prediction: + prior_wrappers = prior_wrappers or {} + # Translate symbols and save it if successful + snippet = CodeC.join(symbol.code for symbol in symbols) pred = self.translate_symbol( name=" ".join(symbol.name for symbol in symbols), reference_code=reference_code, - snippet="\n".join(symbol.code.strip() + "\n" for symbol in symbols), + reference_context=reference_context, + snippet=snippet, dependent_code=dependent_code, prior_translation=prior_translation, feedback=feedback, - translation=translation, ) + pred.failure = None + pred.wrappers = {} + pred.failed_tests = set() if not pred.success: + pred.failure = "translate" return pred # Write translation to crate - translation = pred.translation.code.strip() + "\n" + translation = pred.translation with self.crate.rust_src_path.open("a") as f: - f.write(translation + "\n") + f.write(translation.text + "\n") + + if self.wrap_symbol is None: + # If we don't want a wrapper, then we are done + return pred # Generate wrapper, that may modify the translation, for each symbol - unsafe_translation = pred.translation.code - wrappers: list[dspy.Prediction] = [] + unsafe_translation = translation + wrappers: dict[str, dspy.Prediction] = {} for symbol in symbols: # We can only hybrid build-test functions and variables if not (symbol.is_function and symbol.is_definition) and not symbol.is_variable: @@ -242,31 +292,47 @@ def translate( continue # Wrap function or annotate variable - wrapper = self.wrap_symbol(symbol, reference_code, unsafe_translation) + prior_wrapper = prior_wrappers.get(symbol.name, None) + wrapper = self.wrap_symbol( + symbol=symbol, + reference_code=reference_code, + translation=unsafe_translation, + support_code=support_code + snippet + dependent_code, + prior_wrapper=prior_wrapper, + ) unsafe_translation = wrapper.translation - # Only functions needs to be cached since an LLM does not operate on variables - if symbol.is_function and symbol.is_definition: - wrappers.append(wrapper) + # Save function wrappers for next retry and caching + if symbol.is_function and symbol.is_definition and "wrapper" in wrapper: + wrappers[symbol.name] = wrapper # If wrapping failed exit early if not wrapper.success: pred.success = False + pred.failure = "wrap" pred.feedback = wrapper.feedback break # Try testing symbol and exit early if it fails - if not self.test_symbol: + test_symbol = self.test_symbol + if test_symbol is None: continue - test = self.test_symbol(symbol) + test = test_symbol(symbol, skip=sorted(self._failed_tests)) + pred.failed_tests.update( + name for name, success in test.results.items() if not success + ) if not test.success: pred.success = False + pred.failure = "test" pred.feedback = test.feedback break # Cache successful translation and wrappers if pred.success: self.translate_symbol.write_cache(pred) - for wrapper in wrappers: + for wrapper in wrappers.values(): self.wrap_symbol.write_cache(wrapper) + + # Return wrappers for next retry + pred.wrappers = {name: wrapper.wrapper for name, wrapper in wrappers.items()} return pred diff --git a/src/ideas/translate_snippet.py b/src/ideas/translate_snippet.py index 142cead..1e6afe0 100644 --- a/src/ideas/translate_snippet.py +++ b/src/ideas/translate_snippet.py @@ -9,33 +9,114 @@ from pathlib import Path import dspy +from dspy.utils.exceptions import AdapterParseError +from dspy.utils.usage_tracker import track_usage +from dspy.dsp.utils.settings import settings -from .tools import Crate -from .adapters import Code +from .tools import Crate, LARGE_PROJECT +from .ast import CodeC +from .ast_rust import CodeRust +from .model import format_usage logger = logging.getLogger("ideas.translate_snippet") -CodeC = Code["c"] -CodeRust = Code["rust"] - class SnippetTranslatorSignature(dspy.Signature): """ - Generate an idiomatic, memory-safe Rust translation of the snippet. - The reference_code contains Rust code that should be used by the translation. - The snippet contains a single C definition to translate to idiomatic, memory-safe Rust. - The dependent_code contains C code that uses the C snippet. - Reason about the dependent_code to understand any special memory management or complex ownership requirements a safe and idiomatic translation may need to take into account. - Ensure the translation of the snippet does not use any unsafe constructs! - Do not refactor the reference_code in the translation! - Do not translate dependent_code to Rust in the translation! - Do not define any implementations (`impl`) in the translation! - Always assume all C integer arithmetic operations on the underlying value are intended to have wrapping semantics, and thus any translation should use Rust's wrapping arithmetic functions like `wrapping_add`, `wrapping_shr`, etc.. - Analyze all bitwise operations carefully, especially rotations. - For all bitwise operations, including those that may appear to swap bits for bytes, implement the behavior exactly as written in the C code, without making assumptions about intent. - For mutable global state, always translate to `std::sync::Mutex`-backed statics, use only the short names `Mutex` and `MutexGuard` (never `::std::sync::Mutex` nor `std::sync::Mutex` in emitted code), and require all accesses to go through `lock()`/`try_lock()` guards instead of `static mut` or other unsafe global mutation patterns. - Use the feedback about the prior_translation, if provided, when generating the Rust translation. + Generate an idiomatic, memory-safe Rust translation of a single C definition. + + # Inputs + + - `reference_code`: Existing Rust code the translation must build on. Use it as-is; do not refactor it. + - `snippet`: The single C definition to translate. + - `dependent_code`: C code that uses the snippet. Use it only to understand ownership, lifetime, and memory-management requirements; do not translate it. + - `prior_translation` and `feedback`: If provided, treat the feedback as a critique of the prior translation and address it in the new translation. + + # Hard constraints + + - The translation must contain no `unsafe` constructs. + - Do not include `#![forbid(unsafe_code)]` in the translation since it is included by default. + - Do not define any `impl` blocks. + - Do not weaken behavior with stubs, fallback defaults, relaxed assertions, or intentionally partial implementations. + + # Faithfulness to C semantics + + The overarching rule: reproduce the C code's observable behavior exactly. Do not "fix", simplify, or second-guess the C code's intent. + + ## Arithmetic and expressions + + - Treat all C integer arithmetic as wrapping. Use Rust's wrapping methods (`wrapping_add`, `wrapping_sub`, `wrapping_shr`, etc.). + - Rust postfix operators (method calls, field access, indexing) bind tighter than unary operators (`-`, `!`), infix operators (`+`, `-`, `&`, `|`, `^`), and casts (`as`). + - General receiver rule for postfix chaining: whenever the receiver is anything other than a simple identifier/path, parenthesize the full receiver first, then chain as `(EXPR).method(...)`, `(EXPR).field`, `(EXPR)[idx]`. Apply this uniformly to literals, unary expressions (including unary `-`), casts, and compound expressions; never rely on implicit precedence for the receiver. + - Preserve C operator precedence and associativity exactly. + - Preserve C's implicit signed/unsigned conversion behavior in mixed expressions and comparisons. + - Implement bitwise operations (especially rotations and byte/bit shuffles) literally as written. Do not infer "intent" such as byte-swapping. + - Reproduce inequality direction in bounds and length guards exactly (`>` vs `<`, `>=` vs `<=`). An inverted guard reverses the safety behavior. + + ## Integer text parsing + + - Accept the full range of C-valid inputs, including negatives that wrap into unsigned types (`-1i32 as u8 == 255`). + - Always parse into a wide intermediate type that can represent the full C-valid input range before the final wrapping cast (at minimum `i64` for both narrow signed and narrow unsigned targets; `u64` is also acceptable where appropriate for unsigned-only flows), then apply a wrapping cast to the destination. Never parse digits directly as a narrow or unsigned destination type — that path rejects negatives and can overflow before the wrapping cast can run. + - When the C code uses `scanf`/`sscanf`-style conversion, accept a leading numeric prefix and ignore trailing non-digit characters. Do not use bare `str::parse::()` on the full trimmed string. + + ## scanf / sscanf behavior + + - Distinguish conversion failure (no match) from EOF; do not collapse them. + - On conversion failure, leave destination variables holding their prior values. Declare such variables as `mut` bindings *outside* any retry loop so they retain their last successful value. + - On conversion failure, do not advance the input position; the unmatched bytes must remain available for the next read. + - Respect field widths and scansets exactly. + + ## Strings and NUL termination + + - Treat any length-sensitive operation on a C string buffer (`strlen`, `%s`-style usage, comparisons, hashing, etc.) as ending at the first NUL byte. + - For pointer+length inputs, classify semantics before decoding: if the C code treats the data as a string (`strcmp`, `strlen`, `%s`, token parsing, command dispatch, pattern matching), normalize at ingress by truncating at the first `\0`; if the C path is fixed-length or binary, preserve embedded `\0` bytes and honor the explicit length. + - When converting a NUL-terminated C string buffer into a Rust `String` for storage or downstream text processing, truncate at the first `\\0` *at the point of conversion*, not at the point of use. Stored string values that model C strings must never contain bytes at or after the NUL. + - Before any comparison, pattern match, token parse, or command dispatch on C-origin string data, normalize at ingress by truncating at the first NUL byte. + - Apply the same normalization policy to all operands in the same logical operation. Do not compare a length-decoded value that still includes trailing `\0` bytes against a C-string-decoded value already truncated at `\0`. + - When both pointer and length are present for string-style data, use length only as a safety bound for reads; derive semantic content from C string termination and stop at the first `\0`. + - Truncate once at the ingestion boundary and pass only normalized string values to downstream logic. Do not defer truncation to arbitrary leaf helpers when values are stored or reused across operations. + - Do not compare raw decoded Rust `String` values that may include trailing NUL bytes when those values represent C strings. + - This rule applies only when the original C code is treating the data as a NUL-terminated string (for example `strlen`, `%s`, string comparison, or string parsing). Do not truncate fixed-length, length-delimited, or binary buffers merely because they may contain `\\0`; preserve embedded NUL bytes unless the C code's semantics require string termination. + + ## Fixed-buffer line input (`fgets`) + + - Do not replace `fgets(buf, N, stdin)` with `read_line` or a bulk `io::stdin().read()`. Both consume too much input. + - Replicate `fgets`: read at most N-1 bytes, stop after the first `'\\n'`, and leave all remaining input in stdin. Read byte-by-byte or use `BufRead::fill_buf` + `consume`. + - When storing or comparing `fgets` output as a Rust `String`, truncate at the first `'\\n'` or `'\\0'`, whichever comes first. `fgets` retains the newline before the NUL, and keeping it breaks C-style trimmed comparisons. + + ## Return values and pointer arithmetic + + - Return exactly what the C function returns. If C returns a success/failure code, do not substitute a byte count or length. + - C pointer subtraction (`end - start`) yields a count of elements, not bytes. Preserve the exact value. + + ## Pointer identity + + - When C compares pointers for identity, compare identity-equivalent Rust references. Cloning or copying changes identity and breaks the comparison. + - Translate a C function that returns a pointer into a global/static container (e.g., `return &table[i]`) as a function returning `&T` into that container, not an owned clone. + - If the container is locked: acquire the lock in the caller and borrow `&T` from the held guard. If the existing accessor locks internally and returns an owned value, the caller must bypass it — lock the container directly, borrow references from the guard, and finish all identity comparisons before releasing. + + ## Mutable global state and locking + + - Never lock the same mutex/`RwLock` more than once in a single expression. + - Acquire one guard, read/compute/write through it, then release. + - Do not call helpers that acquire a lock (including stdin's implicit lock) from a scope that already holds that lock. Pick one locking model per code path. + + ## Binary parsers and slice contracts + + - Validate every length-derived slice with an explicit bounds check before slicing or copying. + - Never reinterpret payload bytes as headers, and never re-derive a chunk's length by re-parsing its payload — use `slice.len()` on the bytes the helper was given. + - When consuming bytes from a state-machine bitreader/bytestream, read directly from the reader. Do not reconstruct a backing slice and index into it; reconstructed slices may be short. + - When the C source reads sequentially from a composite buffer (e.g., a primary `&[u32]` plus a trailing partial word), iterate through every component in order. Do not read only the primary array and silently drop the tail. + + ## Auxiliary state introduced by the translation + + If the translation introduces an auxiliary data structure (thread-local, `HashMap`, `RefCell>`, etc.) to represent metadata that C tracked via struct fields or raw pointers, every function that conceptually reads or writes that metadata in C must read or write the auxiliary structure in Rust. A stub claiming the data is "not accessible in safe Rust" is never acceptable once such a mechanism exists. + + ## Observable output + + - Reproduce stdout/stderr text, spacing, punctuation, and line breaks exactly. + - When the C source contains multi-byte UTF-8 literals (e.g., box-drawing characters), count Unicode scalar values, not bytes. Reproduce the same number of code points. """ reference_code: CodeRust = dspy.InputField() @@ -46,88 +127,101 @@ class SnippetTranslatorSignature(dspy.Signature): translation: CodeRust = dspy.OutputField() +_crate_dependencies = """ +# Crate dependencies: +The Rust project has visibility into the following crates: +- `flate2` for DEFLATE compression and decompression +- `regex` for regular expression parsing and matching + +Use functions from these crates as needed to translate the C code to equivalent, memory-safe Rust. +""" + + class SnippetTranslator(dspy.Module): def __init__( self, translator: type[dspy.Module], crate: Crate, max_iters: int = 5, - readonly_cache: Path | None = None, ): super().__init__() - self.translate = translator(SnippetTranslatorSignature) + signature = SnippetTranslatorSignature + if LARGE_PROJECT: + signature = signature.with_instructions( + "\n\n".join([signature.instructions, _crate_dependencies]) + ) + + self._translate = translator(signature) self.crate = crate self.max_iters = max_iters - self.readonly_cache = readonly_cache self.cache = _init_cache(crate.workspace_root / "cache.db") def forward( self, name: str, - reference_code: str, - snippet: str, - dependent_code: str, - prior_translation: str = "", + reference_code: CodeRust, + reference_context: CodeRust, + snippet: CodeC, + dependent_code: CodeC, + prior_translation: CodeRust | None = None, feedback: str = "", - translation: str = "", + translation: CodeRust | None = None, ) -> dspy.Prediction: logger.info(f"Translating snippet `{name}` ...") # If the snippet is empty, use static translation - if not snippet: - translation = f"// Empty snippet `{name}`" - - # Prefer supplied translation, crate cache, then read-only cache. - translation = ( - translation - or _read_cache(self.cache, name, snippet) - or _read_cache(self.readonly_cache, name, snippet) - ) + if not snippet.text: + translation = CodeRust(f"// Empty snippet `{name}`") + + # Use cache when no translation nor prior translation + if translation is None and prior_translation is None: + translation = _read_cache(self.cache, name, snippet) + else: + logger.info("Ignoring snippet cache...") + orig_rust_src = self.crate.rust_src_path.read_bytes() pred = dspy.Prediction() builds = False - dspy_exception = None for i in range(max(self.max_iters, 1)): # Use the translation from the prior iteration as feedback for the next iteration if i > 0: prior_translation = translation - # Ensure any translated snippet is safe and uses std::sync::Mutex - rust_src = "#![forbid(unsafe_code)]\n" - rust_src += "use std::sync::{Mutex, MutexGuard};\n\n" - rust_src += (reference_code + "\n") if reference_code else "" + # Ensure any translated snippet is safe + rust_src = CodeRust("#![forbid(unsafe_code)]") + rust_src += reference_code # Use prior translation as the translation on first iteration only. # This allows static translations that violate safety, which will be fixed by the LLM! - if i == 0 and translation: - pred = dspy.Prediction(translation=CodeRust(code=translation)) - else: - try: - pred = self.translate( - reference_code=CodeRust(code=rust_src), - snippet=CodeC(code=snippet), - dependent_code=CodeC(code=dependent_code), - prior_translation=CodeRust(code=prior_translation), - feedback=feedback, - ) - dspy_exception = None - except Exception as e: - logger.exception( - f"DSPy exception while translating snippet `{name}` on iteration {i + 1}/{self.max_iters}!" - ) - dspy_exception = e - # Attempt again before any build logic - continue + try: + pred = self.translate( + rust_src if not LARGE_PROJECT else reference_context, + snippet, + dependent_code, + prior_translation, + feedback, + translation if i == 0 else None, + ) + except AdapterParseError: + logger.exception( + f"DSPy exception while translating snippet `{name}` on iteration {i + 1}/{self.max_iters}!" + ) + # If this is the last iteration, raise + if i == max(self.max_iters, 1) - 1: + raise + # Otherwise attempt again before any build logic + continue - translation = pred.translation.code + translation = pred.translation + assert isinstance(translation, CodeRust) if translation in reference_code: - translation = f"// duplicate snippet `{name}` detected" + translation = CodeRust(f"// duplicate snippet `{name}` detected") if translation == prior_translation: logger.warning("Snippet translation loop detected!") # Append translation and check if it builds - rust_src += translation.strip() + "\n" - self.crate.rust_src_path.write_text(rust_src) + rust_src += translation + self.crate.rust_src_path.write_text(rust_src.text) self.crate.vcs.add(self.crate.rust_src_path) # FIXME: Checking name for c:@F@main is brittle but we have no better way here. # The proper way to fix is to yield the translation back to the caller so it can @@ -136,34 +230,80 @@ def forward( if not builds: feedback = "Running `cargo build` fails!\n" + feedback + if CodeRust("#![forbid(unsafe_code)]") in translation: + feedback = "Do not include `#![forbid(unsafe_code)]` in the translation!" + builds = False + + usage = format_usage(pred) + # Exit early if we build if builds: - msg = f"Translated snippet `{name}`" + msg = f"Translated snippet `{name}`: {usage}" logger.info(msg) msg += f"\n\n# Reasoning\n{pred.reasoning}" if "reasoning" in pred else "" self.crate.vcs.commit(msg) break - msg = f"Failed to translate snippet `{name}` ({i + 1}/{self.max_iters})" + msg = f"Failed to translate snippet `{name}` ({i + 1}/{self.max_iters}): {usage}" logger.error(msg) msg += f"\n\n# Reasoning\n{pred.reasoning}" if "reasoning" in pred else "" msg += f"\n\n# Feedback\n{feedback}" if feedback else "" self.crate.vcs.commit(msg) self.crate.rust_src_path.write_bytes(orig_rust_src) - # All iterations failed because of DSPy exceptions - if dspy_exception: - raise dspy_exception pred.name = name pred.snippet = snippet pred.reference_code = reference_code pred.dependent_code = dependent_code - pred.prior_translation = prior_translation + pred.prior_translation = prior_translation or CodeRust() pred.feedback = feedback - pred.translation = CodeRust(code=translation) + pred.translation = translation pred.success = builds return pred + def translate( + self, + rust_src: CodeRust, + snippet: CodeC, + dependent_code: CodeC, + prior_translation: CodeRust | None, + feedback: str, + translation: CodeRust | None, + ) -> dspy.Prediction: + """Get a prediction for the current iteration.""" + parent_usage_tracker = settings.usage_tracker + if translation is not None: + pred = dspy.Prediction(translation=translation) + if parent_usage_tracker is not None: + pred.set_lm_usage({}) + else: + if parent_usage_tracker is None: + pred = self._translate( + reference_code=rust_src, + snippet=snippet, + dependent_code=dependent_code, + prior_translation=prior_translation or CodeRust(), + feedback=feedback, + ) + else: + with track_usage() as local_usage_tracker: + pred = self._translate( + reference_code=rust_src, + snippet=snippet, + dependent_code=dependent_code, + prior_translation=prior_translation or CodeRust(), + feedback=feedback, + ) + lm_usage = local_usage_tracker.get_total_tokens() + pred.set_lm_usage(lm_usage) + for lm_name, usage_entry in lm_usage.items(): + parent_usage_tracker.add_usage(lm_name, usage_entry) + return pred + def write_cache(self, pred: dspy.Prediction) -> None: + # If prediction was not generated by an LM then don't write it to cache + if not pred.get_lm_usage(): + return + _write_cache( self.cache, pred.name, @@ -172,7 +312,7 @@ def write_cache(self, pred: dspy.Prediction) -> None: pred.dependent_code, pred.prior_translation, pred.feedback, - pred.translation.code, + pred.translation, pred.success, ) @@ -203,33 +343,39 @@ def _init_cache(cache: Path | None) -> Path | None: return cache -def _read_cache(cache: Path | None, name: str, snippet: str) -> str: - translation = "" +def _read_cache(cache: Path | None, name: str, snippet: CodeC) -> CodeRust | None: if cache is None: - return translation + return None with sqlite3.connect(cache) as conn: try: row = conn.execute( "SELECT translation FROM snippet_translations WHERE snippet=? AND success=1 ORDER BY id DESC LIMIT 1", - (snippet,), + (snippet.text,), ).fetchone() + if row is None: + row = conn.execute( + "SELECT translation FROM snippet_translations WHERE name=? AND success=1 ORDER BY id DESC LIMIT 1", + (name,), + ).fetchone() except Exception: row = None if row: - logger.info(f"Cache hit for `{name}`") - translation = row[0] - return translation + logger.info(f"Cache hit for snippet `{name}`") + return CodeRust(row[0]) + else: + logger.info(f"Cache miss for snippet `{name}`") + return None def _write_cache( cache: Path | None, name: str, - snippet: str, - reference_code: str, - dependent_code: str, - prior_translation: str, + snippet: CodeC, + reference_code: CodeRust, + dependent_code: CodeC, + prior_translation: CodeRust, feedback: str, - translation: str, + translation: CodeRust, success: bool, ): if cache is None: @@ -243,12 +389,12 @@ def _write_cache( """, ( name, - snippet, - reference_code, - dependent_code, - prior_translation, + snippet.text, + reference_code.text, + dependent_code.text, + prior_translation.text, feedback, - translation, + translation.text, int(success), ), ) diff --git a/src/ideas/wrapper.py b/src/ideas/wrapper.py index ce403a3..971847d 100644 --- a/src/ideas/wrapper.py +++ b/src/ideas/wrapper.py @@ -5,66 +5,96 @@ # import re -import sys import sqlite3 import logging +import textwrap from pathlib import Path from collections import OrderedDict -from dataclasses import dataclass, field import dspy -import hydra -from omegaconf import MISSING -from hydra.core.config_store import ConfigStore -from hydra.core.hydra_config import HydraConfig - -from ideas import adapters, model, ModelConfig, GenerateConfig -from ideas.tools import Crate, check_rust, run_subprocess -from ideas import create_translation_unit, extract_info_c -from ideas.adapters import Code -from ideas.init.consolidate import get_symbols_and_dependencies -from ideas.ast_rust import get_nodes, get_root, validate_changes -from ideas.ast import Symbol, clang_make_global_ +from dspy.utils.exceptions import AdapterParseError +from dspy.utils.usage_tracker import track_usage +from dspy.dsp.utils.settings import settings + +from ideas.tools import Crate, check_rust, run_subprocess, LARGE_PROJECT +from ideas.ast_rust import CodeRust, validate_changes, mangle +from ideas.ast import CodeC, Symbol +from ideas.ast import clang_make_global_, clang_make_extern_, clang_make_bindable_ +from ideas.model import format_usage logger = logging.getLogger("ideas.wrapper") -CodeRust = Code["rust"] -@dataclass -class WrapperConfig: - filename: Path = MISSING - model: ModelConfig = field(default_factory=ModelConfig) - generate: GenerateConfig = field(default_factory=GenerateConfig) +class Signature(dspy.Signature): + """ + Generate a C-compatible FFI wrapper for `crate::{symbol_name}`. + + # Goal - cargo_toml: Path = MISSING + Produce a `wrapper` that callers of the original C symbol can link against unchanged. The implementation of `crate::{symbol_name}` lives in the crate at "{crate_path}"; the wrapper will be written to "{wrapper_path}". - max_iters: int = 5 - readonly_cache: Path | None = None + # Template - vcs: str = "none" + - Use `example_wrapper` as the template for `wrapper`. Preserve its function signature, attributes, and module structure exactly. + - Replace only the `unimplemented!()` body with an implementation that calls `crate::{symbol_name}`. + # Type conversions -cs = ConfigStore.instance() -cs.store(name="wrapper", node=WrapperConfig) + Types in `crate::wrapper::` (bindgen-generated, C-compatible layout) are *not* layout-compatible with those in `crate::` (idiomatic Rust). The wrapper must: + 1. Copy field values from each `crate::wrapper::` argument into a fresh `crate::` value before the call. + 2. Call `crate::{symbol_name}` with the converted values. + 3. Copy result/output values back from `crate::` types into the `crate::wrapper::` types the C ABI expects. -class Signature(dspy.Signature): - """ - Output a C-compatible FFI wrapper for `crate::{symbol_name}`. - Use `example_wrapper` as a template for the `wrapper` and replace the `unimplemented!()` part with an implementation. - The implementation for `crate::{symbol_name}` is in a crate that was read from "{crate_path}". - Assume the types in `crate::wrapper::` do not have the same memory layout as those in `crate::`. - The wrapper should properly convert between `crate::wrapper::` and `crate::` types by copying the values from the wrapper type to the crate type before calling `crate::{symbol_name}`. - After this conversion, the wrapper should call the Rust function `crate::{symbol_name}`. - After the call to `crate::{symbol_name}`, the wrapper should convert back the `crate::` types to `crate::wrapper::` types. - The wrapper will be written to "{wrapper_path}". - You will receive feedback about a `prior_wrapper` attempt that should be fixed, if any. - Use the `build_feedback` from `cargo build` about possible build errors. - Use the `scope_feedback` about possible deviations from the templated `example_wrapper`. + Use `support_code` to recover the original C types behind opaque or erased Rust types (notably `void*`, `*mut c_void`, and untyped byte buffers) so each field is converted at its true C type. + + # Raw pointer handling + + - Null-check every raw pointer parameter before its first dereference. On null, return the same value the C function returns for null input (typically an error code, `false`, `-1`, or `null`) — never dereference and panic. Use `support_code` to determine the correct null-input sentinel. + - Treat every mutable raw pointer parameter as in-out unless `support_code` clearly proves it is read-only. + - For every out / in-out pointer parameter, write the converted result back through the raw pointer after the Rust call. Struct and array out-parameters require full field/element write-back. + - Do not route pointer-identity comparisons through detached clones/copies; identity must survive the boundary. + + # Mutable `char*` / byte buffers + + Reproduce C buffer-mutation semantics exactly: + + - If the Rust function computes a normalized or truncated value, write it back into the caller's buffer. + - Classify each pointer+length input by C semantics before conversion: if the C code treats it as a string (`strcmp`, `strlen`, `%s`, token parsing, command dispatch, pattern matching), normalize at ingress by truncating at the first `\0`; if it is a fixed-length or binary buffer, preserve embedded `\0` bytes and use the explicit length. + - Apply the same normalization policy to all operands in the same logical operation. Do not compare a length-decoded string that still contains trailing `\0` against a C-string-decoded operand that was truncated at `\0`. + - When both pointer and length are present for string-style data, use length only as a safety bound for reads; derive semantic content from C string termination and stop at the first `\0`. + - Preserve NUL termination wherever C expects it; never write past the C-implied capacity. + - When C truncates a buffer by writing `'\0'` at a position found by a search (e.g., `strstr`, `strchr`, or a manual scan), the wrapper must re-derive that same offset from the raw buffer and write the NUL byte explicitly — even if the Rust function has internalized the truncation and does not expose the offset. Use `support_code` to recover the exact search function, offset, and capacity assumptions the C original relied on. + - Treat in-place buffer mutations as **primary observable effects**. Callers (and tests) assert them directly; omitting them silently fails every assertion on the buffer regardless of the parsed return values. + + # Panic safety at the ABI boundary + + Wrap every call into Rust code that could panic in `std::panic::catch_unwind`. On a caught panic, return the appropriate C error value for the return type (`0`, `false`, `-1`, `null`, ...). Letting a panic cross an `extern "C"` boundary is undefined behavior and aborts the process in practice. Omit `catch_unwind` only when the called Rust code provably cannot panic. + + # Calling libc / system functions + + The crate already depends on the `libc` crate. When the wrapper needs to call a C standard library or POSIX function (e.g. `fdopen`, `close`, `malloc`, `free`, `memcpy`, `strlen`, `open`, `read`, `write`, `fopen`, `fclose`, ...), call it through the `libc` crate (`unsafe {{ ::libc::fdopen(fd, mode) }}`). + + - **Do not** emit `extern "C" {{ ... }}` (or `unsafe extern "C" {{ ... }}`) blocks declaring libc / POSIX / system functions, and do not emit `#[link(name = "c")]` (or similar) link attributes for libc symbols. + - The only `extern "C"` items permitted in generated wrapper code are those already present in `example_wrapper`; do not introduce any new `extern "C"` items. + - If a needed symbol is not available in `libc`, prefer a safe Rust equivalent from `std` (e.g. `std::ptr`, `std::ffi::CStr`, `std::fs`, `std::io`). If neither is available, do not declare a new foreign function; explain the limitation in your reasoning. + + # Hard constraints + + - Do not relax behavior, skip write-backs, or use placeholder/stub logic. + - Do not declare libc / POSIX functions in `extern "C"` blocks; call them via the `libc` crate. + + # Inputs and feedback + + - `support_code`: the original C source that was translated to Rust. + - `prior_wrapper`: a previous attempt to fix, if any. + - `build_feedback`: errors from `cargo build`. Address them. + - `scope_feedback`: deviations from the `example_wrapper` template. Address them. """ # FIXME: Move crate and example_wrapper into instructions? crate: CodeRust = dspy.InputField() + support_code: CodeC = dspy.InputField() example_wrapper: CodeRust = dspy.InputField() prior_wrapper: CodeRust = dspy.InputField() build_feedback: str = dspy.InputField() @@ -73,52 +103,113 @@ class Signature(dspy.Signature): wrapper: CodeRust = dspy.OutputField() -def generate_unimplemented_wrapper(path: Path, symbol_name: str) -> str: - orig_src = path.read_bytes() - try: - # Make sure symbol is global, this is why we save original bytes - clang_make_global_(path, symbol_name) +class HybridSignature(Signature): + """ + Generate a C-compatible FFI wrapper for `crate::{symbol_name}` in a hybrid C/Rust build where C globals and the Rust port must stay in sync. - # unsafe extern "C" { - # pub fn helloworld() -> ::std::os::raw::c_int; - # } - ok, bindgen_wrapper, error, _ = run_subprocess( - [ - "bindgen", - "--disable-header-comment", - "--no-doc-comments", - "--no-layout-tests", - "--sort-semantically", - str(path), - "--allowlist-function", - symbol_name, - ] - ) - finally: - path.write_bytes(orig_src) - if not ok: - raise ValueError( - f"Bindgen failed to generate wrapper for `{symbol_name}`!\nError:\n{error}" - ) + # Goal + + Produce a `wrapper` that callers of the original C symbol can link against unchanged. The implementation of `crate::{symbol_name}` lives in the crate at "{crate_path}"; the wrapper will be written to "{wrapper_path}". + + # Template + + - Use `example_wrapper` as the template for `wrapper`. Preserve its function signature, attributes, and module structure exactly. + - Replace only the `unimplemented!()` body with an implementation that calls `crate::{symbol_name}`. - if bindgen_wrapper.strip() == "": - raise ValueError(f"Bindgen generated an empty wrapper for `{symbol_name}`!") + # Type conversions - # #[unsafe(export_name="helloworld")] - # pub extern "C" fn helloworld() -> ::std::os::raw::c_int { + Types in `crate::wrapper::` (bindgen-generated, C-compatible layout) are *not* layout-compatible with those in `crate::` (idiomatic Rust). The wrapper must: + + 1. Copy field values from each `crate::wrapper::` argument into a fresh `crate::` value before the call. + 2. Call `crate::{symbol_name}` with the converted values. + 3. Copy result/output values back from `crate::` types into the `crate::wrapper::` types the C ABI expects. + + Use `support_code` to recover the original C types behind opaque or erased Rust types (notably `void*`, `*mut c_void`, and untyped byte buffers) so each field is converted at its true C type. + + # Global synchronization + + If `crate::{symbol_name}` reads or writes globals, synchronize them in the wrapper: + + - Before the call, copy each readable global from the bindgen-generated extern `crate::wrapper::{{var_name}}::{{var_name}}` into the Rust global `crate::{{var_name}}`. + - After the call, copy each writable global from `crate::{{var_name}}` back to `crate::wrapper::{{var_name}}::{{var_name}}`. + + # Raw pointer handling + + - Null-check every raw pointer parameter before its first dereference. On null, return the same value the C function returns for null input (typically an error code, `false`, `-1`, or `null`) — never dereference and panic. Use `support_code` to determine the correct null-input sentinel. + - Treat every mutable raw pointer parameter as in-out unless `support_code` clearly proves it is read-only. + - For every out / in-out pointer parameter, write the converted result back through the raw pointer after the Rust call. Struct and array out-parameters require full field/element write-back. + - Do not route pointer-identity comparisons through detached clones/copies; identity must survive the boundary. + + # Mutable `char*` / byte buffers + + Reproduce C buffer-mutation semantics exactly: + + - If the Rust function computes a normalized or truncated value, write it back into the caller's buffer. + - Classify each pointer+length input by C semantics before conversion: if the C code treats it as a string (`strcmp`, `strlen`, `%s`, token parsing, command dispatch, pattern matching), normalize at ingress by truncating at the first `\0`; if it is a fixed-length or binary buffer, preserve embedded `\0` bytes and use the explicit length. + - Apply the same normalization policy to all operands in the same logical operation. Do not compare a length-decoded string that still contains trailing `\0` against a C-string-decoded operand that was truncated at `\0`. + - When both pointer and length are present for string-style data, use length only as a safety bound for reads; derive semantic content from C string termination and stop at the first `\0`. + - Preserve NUL termination wherever C expects it; never write past the C-implied capacity. + - When C truncates a buffer by writing `'\0'` at a position found by a search (e.g., `strstr`, `strchr`, or a manual scan), the wrapper must re-derive that same offset from the raw buffer and write the NUL byte explicitly — even if the Rust function has internalized the truncation and does not expose the offset. Use `support_code` to recover the exact search function, offset, and capacity assumptions the C original relied on. + - Treat in-place buffer mutations as **primary observable effects**. Callers (and tests) assert them directly; omitting them silently fails every assertion on the buffer regardless of the parsed return values. + + # Panic safety at the ABI boundary + + Wrap every call into Rust code that could panic in `std::panic::catch_unwind`. On a caught panic, return the appropriate C error value for the return type (`0`, `false`, `-1`, `null`, ...). Letting a panic cross an `extern "C"` boundary is undefined behavior and aborts the process in practice. Omit `catch_unwind` only when the called Rust code provably cannot panic. + + # Calling libc / system functions + + The crate already depends on the `libc` crate. When the wrapper needs to call a C standard library or POSIX function (e.g. `fdopen`, `close`, `malloc`, `free`, `memcpy`, `strlen`, `open`, `read`, `write`, `fopen`, `fclose`, ...), call it through the `libc` crate (`unsafe {{ ::libc::fdopen(fd, mode) }}`). + + - **Do not** emit `extern "C" {{ ... }}` (or `unsafe extern "C" {{ ... }}`) blocks declaring libc / POSIX / system functions, and do not emit `#[link(name = "c")]` (or similar) link attributes for libc symbols. + - The only `extern "C"` items permitted in generated wrapper code are those already present in `example_wrapper`; do not introduce any new `extern "C"` items. + - If a needed symbol is not available in `libc`, prefer a safe Rust equivalent from `std` (e.g. `std::ptr`, `std::ffi::CStr`, `std::fs`, `std::io`). If neither is available, do not declare a new foreign function; explain the limitation in your reasoning. + + # Hard constraints + + - Do not relax behavior, skip write-backs, or use placeholder/stub logic. + - Do not declare libc / POSIX functions in `extern "C"` blocks; call them via the `libc` crate. + + # Inputs and feedback + + - `support_code`: the original C source that was translated to Rust. + - `prior_wrapper`: a previous attempt to fix, if any. + - `build_feedback`: errors from `cargo build`. Address them. + - `scope_feedback`: deviations from the `example_wrapper` template. Address them. + """ + + +def generate_unimplemented_wrapper(path: Path, symbol_name: str) -> CodeRust: + # unsafe extern "C" { + # #[link_name = "\u{1}match"] + # pub fn match_( + # threshold: f64, + # ) -> ::std::os::raw::c_int; + # } + bindgen_wrapper = bindgen(path, symbol_name) + + # #[unsafe(export_name="match")] + # pub extern "C" fn match_( + # threshold: f64, + # ) -> ::std::os::raw::c_int { # unimplemented!(); # } unimplemented_wrapper = re.sub( - r'unsafe extern "C" {\s+pub fn (.*);\s+}', + r'unsafe extern "C" {\s*.*\s+pub fn (.*);\s+}', rf'#[unsafe(export_name="{symbol_name}")]\npub extern "C" fn \1 {{\n unimplemented!();\n}}', - bindgen_wrapper, + bindgen_wrapper.text, flags=re.DOTALL, ) - if unimplemented_wrapper == bindgen_wrapper: + if unimplemented_wrapper == bindgen_wrapper.text: raise ValueError( f"Failed to convert bindgen output to function for `{symbol_name}`!\nWrapper:\n{unimplemented_wrapper}" ) - unimplemented_wrapper = unimplemented_wrapper.rstrip() + + # Format unimplemented wrapper using rustfmt + ok, unimplemented_wrapper, error, _ = run_subprocess( + ["rustfmt"], input=unimplemented_wrapper + ) + if not ok: + raise ValueError(f"rustfmt failed!\n{error}") # Validate the template success, output = check_rust( @@ -128,7 +219,7 @@ def generate_unimplemented_wrapper(path: Path, symbol_name: str) -> str: raise ValueError( f"Failed to validate wrapper template for `{symbol_name}`!\nWrapper:\n{unimplemented_wrapper}\nError:\n{output}" ) - return unimplemented_wrapper.strip() + "\n" + return CodeRust(unimplemented_wrapper) class WrapperGenerator(dspy.Module): @@ -136,76 +227,83 @@ def __init__( self, crate: Crate, max_iters: int, - readonly_cache: Path | None = None, ) -> None: super().__init__() self.crate = crate self.max_iters = max_iters - self.readonly_cache = readonly_cache self.cache = _init_cache(crate.workspace_root / "cache.db") - # Add sync module to crate - sync_path = crate.rust_src_path.parent / "sync.rs" - sync_path.write_text((Path(__file__).parent / "sync.rs").read_text()) - self.crate.vcs.add(sync_path) - # Make sure wrapper module is in known state (i.e., empty) self.wrapper_path = crate.rust_src_path.parent / "wrapper.rs" self.wrapper_path.write_text("") - def forward(self, symbol: Symbol, reference_code: str, translation: str) -> dspy.Prediction: + def forward( + self, + symbol: Symbol, + reference_code: CodeRust, + translation: CodeRust, + prior_wrapper: CodeRust | None = None, + wrapper: CodeRust | None = None, + support_code: CodeC | None = None, + ) -> dspy.Prediction: if symbol.is_function and symbol.is_definition: - return self.wrap_function(symbol, reference_code, translation) + return self.wrap_function( + symbol, + reference_code, + translation, + prior_wrapper=prior_wrapper, + wrapper=wrapper, + support_code=support_code, + ) elif symbol.is_variable: - return self.annotate_variable(symbol, reference_code, translation) + self.wrap_variable_(symbol) + return dspy.Prediction(success=True, translation=translation) else: raise NotImplementedError - def annotate_variable( - self, symbol: Symbol, reference_code: str, translation: str - ) -> dspy.Prediction: - logger.info(f"Adding export_name attribute to variable `{symbol.name}` ...") - rust_src = self.crate.rust_src_path.read_text() - if translation not in rust_src: - raise RuntimeError("Translation must be on disk!") - - # Add export_name attribute to symbol translation - new_translation = export_first_unannotated_variable(translation, symbol.spelling) - if new_translation is None: - logger.error(f"Failed to add export_name attribute to variable `{symbol.name}`") - return dspy.Prediction( - success=False, - translation=translation, - feedback=f"Could not find a Rust variable named `{symbol.name}` in the translation!", - ) + def wrap_variable_(self, symbol: Symbol): + logger.info(f"Generating wrapper for variable `{symbol.name}` ...") - # Update Rust source with export_name attribute - rust_src = rust_src.replace(translation, new_translation) - self.crate.rust_src_path.write_text(rust_src) - self.crate.vcs.add(self.crate.rust_src_path) + # Variable wrappers are just bindings to C symbols + rust_spelling = mangle(symbol.spelling) + wrapper = bindgen(self.crate.c_src_path, symbol.spelling) + symbol_wrapper_path = self.wrapper_path.parent / "wrapper" / f"{rust_spelling}.rs" + symbol_wrapper_path.parent.mkdir(exist_ok=True, parents=True) + symbol_wrapper_path.write_text(wrapper.text) + self.crate.vcs.add(symbol_wrapper_path) - # Replace Rust Mutex with C ABI-compatible Mutex - RUST_MUTEX = "use std::sync::{Mutex, MutexGuard};" - C_ABI_MUTEX = "mod sync;\nuse crate::sync::{Mutex, MutexGuard};" - if RUST_MUTEX in rust_src: - rust_src = rust_src.replace(RUST_MUTEX, C_ABI_MUTEX) - self.crate.rust_src_path.write_text(rust_src) - self.crate.vcs.add(self.crate.rust_src_path) + success, output = self._build(symbol) + if not success: + raise RuntimeError(f"Failed to build crate!\n{output}") - self.crate.vcs.commit(f"Added export_name attribute to variable `{symbol.name}` ...") + # Permanently make variable global + clang_make_global_(self.crate.c_src_path, symbol.spelling) + self.crate.vcs.add(self.crate.c_src_path) + + # Reference symbol wrapper in wrapper module. + with self.wrapper_path.open("a") as f: + f.write(f"pub mod {rust_spelling};\n") + self.crate.vcs.add(self.wrapper_path) - return dspy.Prediction(success=True, translation=new_translation) + msg = f"Wrapped variable `{symbol.name}`" + logger.info(msg) + self.crate.vcs.commit(msg) def wrap_function( self, symbol: Symbol, - reference_code: str, - translation: str, - wrapper: str = "", - prior_wrapper: str = "", + reference_code: CodeRust, + translation: CodeRust, + prior_wrapper: CodeRust | None = None, + wrapper: CodeRust | None = None, + support_code: CodeC | None = None, ) -> dspy.Prediction: # Don't bother wrapping main in binary crates if symbol.spelling == "main" and self.crate.is_bin: + # Permanently make main function extern + clang_make_extern_(self.crate.c_src_path, symbol.spelling) + self.crate.vcs.add(self.crate.c_src_path) + self.crate.vcs.commit(f"Made function `{symbol.name}` extern") return dspy.Prediction(success=True, translation=translation) logger.info(f"Generating wrapper for function `{symbol.name}` ...") @@ -214,23 +312,24 @@ def wrap_function( unimplemented_wrapper = generate_unimplemented_wrapper( self.crate.c_src_path, symbol.spelling ) - symbol_wrapper_path = self.wrapper_path.parent / "wrapper" / f"{symbol.spelling}.rs" + rust_spelling = mangle(symbol.spelling) + symbol_wrapper_path = self.wrapper_path.parent / "wrapper" / f"{rust_spelling}.rs" symbol_wrapper_path.parent.mkdir(exist_ok=True, parents=True) - symbol_wrapper_path.write_text(unimplemented_wrapper) - success, build_feedback = self._build(symbol.spelling) + symbol_wrapper_path.write_text(unimplemented_wrapper.text) + success, build_feedback = self._build(symbol) if not success: raise RuntimeError(f"The crate does not build!\n\n{build_feedback}") - # Prefer supplied wrapper, crate cache, then read-only cache. - wrapper = ( - wrapper - or _read_cache(self.cache, symbol.spelling, unimplemented_wrapper) - or _read_cache(self.readonly_cache, symbol.spelling, unimplemented_wrapper) - ) + # Use cache when no wrapper nor prior wrapper + if wrapper is None and prior_wrapper is None: + wrapper = _read_cache(self.cache, symbol.spelling, unimplemented_wrapper) + else: + logger.info("Ignoring wrapper cache...") # Generate dynamic signature and module for symbol - signature = Signature.with_instructions( - Signature.instructions.format( + signature_class = HybridSignature if not LARGE_PROJECT else Signature + signature = signature_class.with_instructions( + signature_class.instructions.format( symbol_name=symbol.spelling, crate_path=self.crate.rust_src_path.relative_to(self.crate.cargo_toml.parent), wrapper_path=symbol_wrapper_path.relative_to(self.crate.cargo_toml.parent), @@ -238,10 +337,14 @@ def wrap_function( ) generate_wrapper = dspy.ChainOfThought(signature) + # Construct crate context for generate_wrapper and format it + crate = ( + reference_code + translation + self.gather_wrappers(exclude_wrapper=rust_spelling) + ) + # Try generating wrapper up to max_iter times msg = "" success, build_feedback = False, "" - dspy_exception = None scope_feedback: OrderedDict[str, str] = OrderedDict() pred = dspy.Prediction() for i in range(max(self.max_iters, 1)): @@ -250,54 +353,61 @@ def wrap_function( prior_wrapper = wrapper try: - if i == 0 and wrapper: - pred = dspy.Prediction(wrapper=CodeRust(code=wrapper)) - else: - pred = generate_wrapper( - crate=CodeRust(code=reference_code + "\n" + translation), - example_wrapper=CodeRust(code=unimplemented_wrapper), - prior_wrapper=CodeRust(code=prior_wrapper), - build_feedback=build_feedback, - scope_feedback="\n\n".join(scope_feedback.values()), - ) - dspy_exception = None - except Exception as e: + pred = self.generate( + generate_wrapper, + crate, + support_code, + unimplemented_wrapper, + prior_wrapper, + build_feedback, + "\n\n".join(scope_feedback.values()), + wrapper if i == 0 else None, + ) + except AdapterParseError: logger.exception( f"DSPy exception while generating wrapper for `{symbol.name}` on iteration {i + 1}/{self.max_iters}!" ) - dspy_exception = e - # Attempt again before any build logic + # If this is the last iteration, raise + if i == max(self.max_iters, 1) - 1: + raise + # Otherwise attempt again before any build logic continue # Reset scope feedback scope_feedback.clear() - if pred.wrapper is None: + if "wrapper" not in pred or not isinstance(pred.wrapper, CodeRust): scope_feedback["no_wrapper"] = ( "No wrapper was generated. You must respect the template and instructions **exactly**!" ) wrapper = unimplemented_wrapper else: - wrapper = pred.wrapper.code.strip() + "\n" + wrapper = pred.wrapper # Validate that changes are in scope scope_feedback.update(validate_changes(wrapper, unimplemented_wrapper)) # TODO: Check for a single crate function call in scope # Write wrapper to disk and check if we build with unsafe code since wrappers can use unsafe code - symbol_wrapper_path.write_text(wrapper) + symbol_wrapper_path.write_text(wrapper.text) self.crate.vcs.add(symbol_wrapper_path) - success, build_feedback = self._build(symbol.spelling) + success, build_feedback = self._build(symbol) success = success and not build_feedback and not scope_feedback + usage = format_usage(pred) + if success: + # Permanently make function extern + clang_make_extern_(self.crate.c_src_path, symbol.spelling) + self.crate.vcs.add(self.crate.c_src_path) + # Reference successful symbol wrapper in wrapper module with self.wrapper_path.open("a") as f: - f.write(f"pub mod {symbol.spelling};\n") + f.write(f"pub mod {rust_spelling};\n") self.crate.vcs.add(self.wrapper_path) # Log and commit success - msg = f"Wrapped function `{symbol.name}`" + msg = f"Wrapped function `{symbol.name}`: {usage}" logger.info(msg) if "reasoning" in pred: msg += f"\n\n# Reasoning\n{pred.reasoning}" @@ -305,23 +415,19 @@ def wrap_function( break # Log and commit failure - msg = f"Failed to wrap function `{symbol.name}` ({i + 1}/{self.max_iters})" + msg = f"Failed to wrap function `{symbol.name}` ({i + 1}/{self.max_iters}): {usage}" logger.error(msg) msg += f"\n\n# Reasoning\n{pred.reasoning}" if "reasoning" in pred else "" msg += f"\n\n# Build feedback\n{build_feedback}" msg += f"\n\n# Scope Feedback\n{scope_feedback}" self.crate.vcs.commit(msg) - # All iterations failed because of DSPy exceptions - if dspy_exception: - raise dspy_exception - pred.success = success pred.name = symbol.spelling pred.translation = translation pred.wrapper = wrapper pred.bindgen_template = unimplemented_wrapper - pred.prior_wrapper = prior_wrapper + pred.prior_wrapper = prior_wrapper or CodeRust() pred.build_feedback = build_feedback pred.scope_feedback = "\n\n".join(scope_feedback.values()) if not success: @@ -329,30 +435,122 @@ def wrap_function( pred.feedback = "It was difficult to generate a C-compatible FFI wrapper for the translation. Regenerate the translation with clear, explicit, wrapper-friendly Rust function boundaries and straightforward ownership, while keeping the translation fully memory-safe and free of unsafe constructs." return pred - def _build(self, symbol_spelling: str) -> tuple[bool, str]: + def generate( + self, + generate_wrapper: dspy.ChainOfThought, + crate: CodeRust, + support_code: CodeC | None, + example_wrapper: CodeRust, + prior_wrapper: CodeRust | None, + build_feedback: str, + scope_feedback: str, + wrapper: CodeRust | None, + ) -> dspy.Prediction: + """Generate a wrapper prediction, using cached wrapper or calling the LLM.""" + parent_usage_tracker = settings.usage_tracker + if wrapper is not None: + pred = dspy.Prediction(wrapper=wrapper) + if parent_usage_tracker is not None: + pred.set_lm_usage({}) + else: + if parent_usage_tracker is None: + pred = generate_wrapper( + crate=crate, + support_code=support_code or CodeC(), + example_wrapper=example_wrapper, + prior_wrapper=prior_wrapper or CodeRust(), + build_feedback=build_feedback, + scope_feedback=scope_feedback, + ) + else: + with track_usage() as local_usage_tracker: + pred = generate_wrapper( + crate=crate, + support_code=support_code or CodeC(), + example_wrapper=example_wrapper, + prior_wrapper=prior_wrapper or CodeRust(), + build_feedback=build_feedback, + scope_feedback=scope_feedback, + ) + lm_usage = local_usage_tracker.get_total_tokens() + pred.set_lm_usage(lm_usage) + for lm_name, usage_entry in lm_usage.items(): + parent_usage_tracker.add_usage(lm_name, usage_entry) + return pred + + def gather_wrappers(self, exclude_wrapper: str = "") -> CodeRust: + wrapper_dir = self.wrapper_path.parent / "wrapper" + if not wrapper_dir.is_dir(): + return CodeRust() + + modules: OrderedDict[str, str] = OrderedDict() + for symbol_wrapper_path in sorted(wrapper_dir.glob("*.rs")): + rust_spelling = symbol_wrapper_path.stem + if exclude_wrapper and rust_spelling == exclude_wrapper: + continue + if rust_spelling in modules: + continue + + wrapper_src = symbol_wrapper_path.read_text().strip() + if not wrapper_src: + continue + + modules[rust_spelling] = ( + f"pub mod {rust_spelling} {{\n" + textwrap.indent(wrapper_src, " ") + "\n}" + ) + + if not modules: + return CodeRust() + + return CodeRust( + "pub mod wrapper {\n" + + textwrap.indent("\n\n".join(modules.values()), " ") + + "\n}\n" + ) + + def _build(self, symbol: Symbol) -> tuple[bool, str]: + orig_c_src = self.crate.c_src_path.read_bytes() orig_rust_src = self.crate.rust_src_path.read_bytes() orig_wrapper_src = self.wrapper_path.read_bytes() + if symbol.is_function: + # Make C function extern so that we use the Rust function definition + clang_make_extern_(self.crate.c_src_path, symbol.spelling) + elif symbol.is_variable: + # Make C variable global so we can reference it in the Rust wrapper + clang_make_global_(self.crate.c_src_path, symbol.spelling) + else: + raise NotImplementedError + self.crate.vcs.add(self.crate.c_src_path) + + # Remove forbid unsafe from Rust source + rust_src = orig_rust_src.decode().replace("#![forbid(unsafe_code)]", "") + # Reference wrapper module in Rust source - with self.crate.rust_src_path.open("a") as f: - f.write("pub mod wrapper;\n") + rust_src += "pub mod wrapper;\n" + self.crate.rust_src_path.write_text(rust_src) self.crate.vcs.add(self.crate.rust_src_path) # Reference symbol wrapper module to wrapper module with self.wrapper_path.open("a") as f: - f.write(f"pub mod {symbol_spelling};\n") + f.write(f"pub mod {mangle(symbol.spelling)};\n") self.crate.vcs.add(self.wrapper_path) # Check whether all of the changes compile and commit them - success, feedback = self.crate.cargo_build(allow_unsafe=True) + success, feedback = self.crate.cargo_build() # Restore original source + self.crate.c_src_path.write_bytes(orig_c_src) self.crate.rust_src_path.write_bytes(orig_rust_src) self.wrapper_path.write_bytes(orig_wrapper_src) return success, feedback def write_cache(self, pred: dspy.Prediction) -> None: + # If prediction was not generated by an LM then don't write it to cache + if not pred.get_lm_usage(): + return + required_fields = ( "name", "bindgen_template", @@ -377,34 +575,44 @@ def write_cache(self, pred: dspy.Prediction) -> None: ) -def export_first_unannotated_variable(rust_src: str, export_name: str) -> str | None: - # Loop through nodes trying to find a static item - attrs = [] - rust_bytes = rust_src.encode() - for node in get_nodes(get_root(rust_bytes)): - # Keep track of attributes - if node.type == "attribute_item": - attrs.append(node) - continue - - # Reset list of attributes when we encounter non-static/non-attribute item - elif node.type != "static_item": - attrs = [] - continue - - # If export name already in attrs, skip this static item - if any(b"export_name" in attr.text for attr in attrs if attr.text is not None): - continue - - # FIXME: Warn if name of variable does not correspond to export_name - - # Insert attribute at location - return ( - rust_bytes[: node.start_byte].decode() - + f'#[unsafe(export_name="{export_name}")]\n' - + rust_bytes[node.start_byte :].decode() +def bindgen(path: Path, symbol_name: str) -> CodeRust: + orig_src = path.read_bytes() + try: + # We want bindgen to run against an in-place extern'd declaration so it emits + # a linkable item (`pub fn` / `pub static mut`) instead of value-style + # constants for initialized globals, which we can't link against from Rust. + clang_make_bindable_(path, symbol_name) + + # unsafe extern "C" { + # pub static mut foo: ::std::os::raw::c_int; + # } + ok, binding, error, _ = run_subprocess( + [ + "bindgen", + "--disable-header-comment", + "--no-doc-comments", + "--no-layout-tests", + "--sort-semantically", + str(path), + "--allowlist-item", + mangle(symbol_name), + ] + ) + finally: + path.write_bytes(orig_src) + if not ok: + raise ValueError(f"Bindgen failed for `{symbol_name}` in '{path}'!\nError:\n{error}") + + binding = binding.strip() + if binding == "": + raise ValueError(f"Bindgen generated an empty binding for `{symbol_name}` in '{path}'!") + + success, output = check_rust(binding, flags=["--crate-type", "lib", "--emit", "metadata"]) + if not success: + raise ValueError( + f"Failed to validate binding for `{symbol_name}` in '{path}'!\nWrapper:\n{binding}\nError:\n{output}" ) - return None + return CodeRust(binding) def _init_cache(cache: Path | None) -> Path | None: @@ -432,32 +640,38 @@ def _init_cache(cache: Path | None) -> Path | None: return cache -def _read_cache(cache: Path | None, name: str, bindgen_template: str) -> str: - wrapper = "" +def _read_cache(cache: Path | None, name: str, bindgen_template: CodeRust) -> CodeRust | None: if cache is None: - return wrapper + return None with sqlite3.connect(cache) as conn: try: row = conn.execute( - "SELECT wrapper FROM wrapper_translations WHERE name=? AND bindgen_template=? AND success=1 ORDER BY id DESC LIMIT 1", - (name, bindgen_template), + "SELECT wrapper FROM wrapper_translations WHERE bindgen_template=? AND success=1 ORDER BY id DESC LIMIT 1", + (bindgen_template.text,), ).fetchone() + if row is None: + row = conn.execute( + "SELECT wrapper FROM wrapper_translations WHERE name=? AND success=1 ORDER BY id DESC LIMIT 1", + (name,), + ).fetchone() except Exception: row = None if row: logger.info(f"Cache hit for wrapper `{name}`") - wrapper = row[0] - return wrapper + return CodeRust(row[0]) + else: + logger.info(f"Cache miss for wrapper `{name}`") + return None def _write_cache( cache: Path | None, name: str, - bindgen_template: str, - prior_wrapper: str, + bindgen_template: CodeRust, + prior_wrapper: CodeRust, build_feedback: str, scope_feedback: str, - wrapper: str, + wrapper: CodeRust, success: bool, ) -> None: if cache is None: @@ -471,68 +685,11 @@ def _write_cache( """, ( name, - bindgen_template, - prior_wrapper, + bindgen_template.text, + prior_wrapper.text, build_feedback, scope_feedback, - wrapper, + wrapper.text, int(success), ), ) - - -def _main(cfg: WrapperConfig) -> None: - output_dir = Path(HydraConfig.get().runtime.output_dir) - logger.info(f"Saving results to {output_dir}") - crate = Crate(cargo_toml=cfg.cargo_toml.resolve(), vcs=cfg.vcs) # type: ignore[reportArgumentType] - - model.configure(cfg.model, cfg.generate) - dspy.configure(adapter=adapters.ChatAdapter()) - agent = WrapperGenerator(crate, max_iters=cfg.max_iters, readonly_cache=cfg.readonly_cache) - - # Remove forbid unsafe from Rust source - rust_src = re.sub(re.escape("#![forbid(unsafe_code)]"), "", crate.rust_src_path.read_text()) - crate.rust_src_path.write_text(rust_src) - - # Get global symbol table - tu = create_translation_unit(cfg.filename) - asts = [extract_info_c(tu)] - symbols, _ = get_symbols_and_dependencies(asts, source_priority=[]) - - # Generate wrappers for each global function definition - for symbol in symbols.values(): - if symbol.is_global and symbol.is_function and symbol.is_definition: - agent(symbol, "", rust_src) - - # Reference wrapper in Rust source - with crate.rust_src_path.open("a") as f: - f.write("pub mod wrapper;\n") - crate.vcs.add(crate.rust_src_path) - - success, feedback = crate.cargo_build(allow_unsafe=True) - - # Commit unsafe Rust code and wrappers - if (output_subdir := HydraConfig.get().output_subdir) is not None: - crate.vcs.add(output_dir / output_subdir) - name = f"`{crate.root_package['name']}`" - msg = f"Successfully wrapped all symbols in {name}!" - if not success: - msg = f"Failed to wrap all symbols in {name}!" - logger.error(msg) - msg += f"\n\n{feedback}" - else: - logger.info(msg) - crate.vcs.commit(msg) - - -@hydra.main(version_base=None, config_name="wrapper") -def main(cfg: WrapperConfig) -> None: - try: - _main(cfg) - except Exception as e: - logger.exception(e) - sys.exit(-1) - - -if __name__ == "__main__": - main() diff --git a/test/fixtures/code_preprocessing/expected.c.i b/test/fixtures/code_preprocessing/expected.c.i deleted file mode 100644 index 4679f29..0000000 --- a/test/fixtures/code_preprocessing/expected.c.i +++ /dev/null @@ -1,17 +0,0 @@ -int x; - - -int main() { - char text[128]; - printf("Hello World!\n"); - - while (fgets(text, 128, - stdin - )) { - fputs(text, - stdout - ); - } - - return 0; -} diff --git a/test/fixtures/code_preprocessing/input.c.i b/test/fixtures/code_preprocessing/input.c.i deleted file mode 100644 index c8030f5..0000000 --- a/test/fixtures/code_preprocessing/input.c.i +++ /dev/null @@ -1,42 +0,0 @@ -# 1 "/usr/include/stdc-predef.h" 1 3 4 -# 0 "" 2 -# 1 "/some/path/to/keep.c" -# 24 "/other/path/to/keep.c" -# 1 "/usr/include/stdio.h" 1 3 4 -# 885 "/usr/include/stdio.h" 3 4 -extern int __uflow (FILE *); -extern int __overflow (FILE *, int); -# 902 "/usr/include/stdio.h" 3 4 - -# 29 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4 - - -typedef unsigned char __u_char; -typedef unsigned short int __u_short; -typedef unsigned int __u_int; -typedef unsigned long int __u_long; - - -# 25 "/home/path/to/keep.c" 2 -int x; - - -# 26 "/home/path/to/main.c" -int main() { - char text[128]; - printf("Hello World!\n"); - - while (fgets(text, 128, -# 30 "/home/path/to/main.c" 3 4 - stdin -# 30 "/home/path/to/main.c" - )) { - fputs(text, -# 31 "/home/path/to/main.c" 3 4 - stdout -# 31 "/home/path/to/main.c" - ); - } - - return 0; -} diff --git a/test/fixtures/compile/clippy.rs b/test/fixtures/compile/clippy.rs deleted file mode 100644 index 05e234e..0000000 --- a/test/fixtures/compile/clippy.rs +++ /dev/null @@ -1,195 +0,0 @@ -use std::collections::HashMap; - -// Correctness violations -fn correctness_invalid_regex() { - let _ = regex::Regex::new("[").unwrap(); // invalid regex -} - -fn correctness_out_of_bounds_indexing() { - let v = vec![1, 2, 3]; - let _ = v[10]; // out of bounds -} - -fn correctness_float_cmp() -> bool { - let x = 1.0; - let y = 2.0; - x == y // float comparison -} - -fn correctness_clone_on_copy() { - let x = 42i32; - let _ = x.clone(); // cloning Copy type -} - -fn correctness_redundant_clone() { - let s = String::from("hello"); - let _ = s.clone().len(); // redundant clone -} - -// Suspicious violations -fn suspicious_empty_loop() { - loop {} // empty infinite loop -} - -fn suspicious_suspicious_else_formatting() -> i32 { - let x = 5; - if x > 0 { 1 } - else - { 0 } // suspicious else formatting -} - -fn suspicious_assign_op_pattern() { - let mut x = 5; - x = x + 1; // should use += -} - -fn suspicious_inefficient_to_string() { - let s = "hello"; - let _ = s.to_string(); // should use to_owned() for &str -} - -fn suspicious_single_char_pattern() { - let s = "hello world"; - let _ = s.split("l"); // single char should use char literal -} - -// Complexity violations -fn complexity_too_many_arguments(a: i32, b: i32, c: i32, d: i32, e: i32, f: i32, g: i32, h: i32) { - println!("{} {} {} {} {} {} {} {}", a, b, c, d, e, f, g, h); -} - -fn complexity_cognitive_complexity() -> i32 { - let mut result = 0; - for i in 0..10 { - if i % 2 == 0 { - for j in 0..5 { - if j > 2 { - if i > 5 { - result += 1; - if result > 10 { - break; - } - } - } - } - } - } - result -} - -fn complexity_type_complexity() -> Result>>>, Box> { - Ok(HashMap::new()) -} - -fn complexity_cyclomatic_complexity(x: i32) -> i32 { - if x > 0 { - if x > 10 { - if x > 20 { - if x > 30 { - if x > 40 { - 50 - } else { 40 } - } else { 30 } - } else { 20 } - } else { 10 } - } else { 0 } -} - -// Performance violations -fn perf_unnecessary_to_owned() { - let s = String::from("hello"); - let _ = s.as_str().to_owned(); // unnecessary -} - -fn perf_string_add_assign() { - let mut s = String::new(); - s = s + "hello"; // inefficient, should use push_str -} - -fn perf_vec_init_then_push() { - let mut v = Vec::new(); - v.push(1); - v.push(2); - v.push(3); // should use vec! macro -} - -fn perf_iter_nth_zero() { - let v = vec![1, 2, 3, 4, 5]; - let _ = v.iter().nth(0); // should use .first() -} - -fn perf_large_stack_arrays() { - let _large_array = [0u8; 512 * 1024]; // large stack allocation -} - -// Style violations -fn style_needless_return() -> i32 { - return 42; // needless return -} - -fn style_single_match() -> String { - let x = Some(42); - match x { - Some(n) => n.to_string(), - None => "none".to_string(), - } // should use if let -} - -fn style_redundant_field_names() { - let name = "test".to_string(); - let value = 42; - let _s = MyStruct { - name: name, // redundant field name - value: value // redundant field name - }; -} - -struct MyStruct { - name: String, - value: i32, -} - -fn style_unnecessary_mut() { - let mut x = 5; // unnecessary mut - println!("{}", x); -} - -fn style_collapsible_if() { - let x = 5; - if x > 0 { - if x < 10 { // collapsible - println!("between 0 and 10"); - } - } -} - -fn style_len_zero() { - let v = vec![1, 2, 3]; - if v.len() == 0 { // should use is_empty() - println!("empty"); - } -} - -fn style_redundant_closure() { - let v = vec![1, 2, 3]; - let _: Vec = v.iter().map(|x| x.to_string()).collect(); // redundant closure -} - -fn style_manual_map() -> Option { - let x = Some(5); - match x { - Some(val) => Some(val * 2), - None => None, // manual map - } -} - -// Additional violations -fn style_unnecessary_wraps() -> Option { - Some(42) // always returns Some -} - -fn perf_useless_vec() { - for item in vec![1, 2, 3].iter() { // useless vec - println!("{}", item); - } -} diff --git a/test/fixtures/isystem_inline_dep/test_case/CMakeLists.txt b/test/fixtures/isystem_inline_dep/test_case/CMakeLists.txt new file mode 100644 index 0000000..4a0d24e --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.10) +project(isystem_inline_dep C) + +# util shared library - includes util/ with regular -I +add_library(util SHARED util/user.c) +target_include_directories(util PRIVATE util) + +# ext shared library - includes util/ with SYSTEM (generates -isystem) +# This is the pattern that causes the bug: same header directory included +# as SYSTEM in one target but regular in another. +add_library(ext SHARED ext/caller.c) +target_include_directories(ext SYSTEM PRIVATE util) +target_include_directories(ext PRIVATE ext) + +# Main executable links both +add_executable(isystem_inline_dep main.c) +target_include_directories(isystem_inline_dep PRIVATE util) +target_link_libraries(isystem_inline_dep util ext) diff --git a/test/fixtures/isystem_inline_dep/test_case/ext/bridge.h b/test/fixtures/isystem_inline_dep/test_case/ext/bridge.h new file mode 100644 index 0000000..1f0772a --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/ext/bridge.h @@ -0,0 +1,8 @@ +#ifndef BRIDGE_H +#define BRIDGE_H + +#include "alloc.h" + +#define ext_malloc(x) my_alloc(x) + +#endif diff --git a/test/fixtures/isystem_inline_dep/test_case/ext/caller.c b/test/fixtures/isystem_inline_dep/test_case/ext/caller.c new file mode 100644 index 0000000..ebb1865 --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/ext/caller.c @@ -0,0 +1,19 @@ +#include "bridge.h" + +typedef struct { + int val; +} item_t; + +static item_t *make_item(int val) { + item_t *p; + if (!(p = (item_t *)ext_malloc(sizeof(item_t)))) + return (void *)0; + p->val = val; + return p; +} + +int do_work(int x) { + item_t *item = make_item(x); + if (item) return item->val; + return -1; +} diff --git a/test/fixtures/isystem_inline_dep/test_case/main.c b/test/fixtures/isystem_inline_dep/test_case/main.c new file mode 100644 index 0000000..2d3f860 --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/main.c @@ -0,0 +1,10 @@ +#include "alloc.h" + +int do_work(int x); +void *my_calloc(size_t n, size_t sz); + +int main(void) { + void *p = my_calloc(4, sizeof(int)); + if (p) free(p); + return do_work(42); +} diff --git a/test/fixtures/isystem_inline_dep/test_case/util/alloc.h b/test/fixtures/isystem_inline_dep/test_case/util/alloc.h new file mode 100644 index 0000000..dc8fa4d --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/util/alloc.h @@ -0,0 +1,10 @@ +#ifndef ALLOC_H +#define ALLOC_H + +#include + +static inline void *my_alloc(size_t len) { + return malloc(len); +} + +#endif diff --git a/test/fixtures/isystem_inline_dep/test_case/util/user.c b/test/fixtures/isystem_inline_dep/test_case/util/user.c new file mode 100644 index 0000000..a727564 --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/util/user.c @@ -0,0 +1,6 @@ +#include "alloc.h" + +void *my_calloc(size_t n, size_t sz) { + void *p = my_alloc(n * sz); + return p; +} diff --git a/test/fixtures/isystem_inline_dep/test_case/xdiff/bridge.h b/test/fixtures/isystem_inline_dep/test_case/xdiff/bridge.h new file mode 100644 index 0000000..a884876 --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/xdiff/bridge.h @@ -0,0 +1,8 @@ +#ifndef BRIDGE_H +#define BRIDGE_H + +#include "alloc.h" + +#define xdl_malloc(x) my_alloc(x) + +#endif diff --git a/test/fixtures/isystem_inline_dep/test_case/xdiff/caller.c b/test/fixtures/isystem_inline_dep/test_case/xdiff/caller.c new file mode 100644 index 0000000..16d4caf --- /dev/null +++ b/test/fixtures/isystem_inline_dep/test_case/xdiff/caller.c @@ -0,0 +1,19 @@ +#include "bridge.h" + +typedef struct { + int val; +} item_t; + +static item_t *make_item(int val) { + item_t *p; + if (!(p = (item_t *)xdl_malloc(sizeof(item_t)))) + return (void *)0; + p->val = val; + return p; +} + +int do_work(int x) { + item_t *item = make_item(x); + if (item) return item->val; + return -1; +} diff --git a/test/fixtures/scc_ordering_bug/test_case/CMakeLists.txt b/test/fixtures/scc_ordering_bug/test_case/CMakeLists.txt new file mode 100644 index 0000000..4b9c9a5 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.10) +project(scc_ordering_bug) + +# Reproduces SCC ordering bug in consolidation: +# caller.c (rank 1) defines compute() which calls helper(). +# state.c (rank 2) defines helper() (inline) and vtable = { .fn = compute }. +# Cycle: compute -> helper -> vtable -> compute. +# merge_symbols picks helper from state.c (definition beats declaration). +# Within the SCC, compute (rank 1) is emitted before helper (rank 2). +# helper has declaration=None (inline) so no forward decl is emitted. +# Result: "call to undeclared function 'helper'" +add_executable(scc_ordering_bug + src/main.c + src/caller.c + src/state.c +) +target_include_directories(scc_ordering_bug PRIVATE include) diff --git a/test/fixtures/scc_ordering_bug/test_case/include/fwd.h b/test/fixtures/scc_ordering_bug/test_case/include/fwd.h new file mode 100644 index 0000000..4d8dd85 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/include/fwd.h @@ -0,0 +1,13 @@ +#ifndef FWD_H +#define FWD_H + +struct vtable_t { + int (*fn)(int); +}; + +extern struct vtable_t vtable; + +/* Forward declaration only — no definition of helper here. */ +int helper(int x); + +#endif diff --git a/test/fixtures/scc_ordering_bug/test_case/include/header.h b/test/fixtures/scc_ordering_bug/test_case/include/header.h new file mode 100644 index 0000000..51b2257 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/include/header.h @@ -0,0 +1,15 @@ +#ifndef HEADER_H +#define HEADER_H + +struct vtable_t { + int (*fn)(int); +}; + +extern struct vtable_t vtable; + +/* Full definition of helper — only included by state.c */ +inline int helper(int x) { + return vtable.fn(x); +} + +#endif diff --git a/test/fixtures/scc_ordering_bug/test_case/src/caller.c b/test/fixtures/scc_ordering_bug/test_case/src/caller.c new file mode 100644 index 0000000..42f15e8 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/src/caller.c @@ -0,0 +1,5 @@ +#include "fwd.h" + +int compute(int x) { + return helper(x) + 1; +} diff --git a/test/fixtures/scc_ordering_bug/test_case/src/main.c b/test/fixtures/scc_ordering_bug/test_case/src/main.c new file mode 100644 index 0000000..8585358 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/src/main.c @@ -0,0 +1,9 @@ +#include + +int helper(int x); +int compute(int x); + +int main(void) { + printf("%d\n", compute(42)); + return 0; +} diff --git a/test/fixtures/scc_ordering_bug/test_case/src/state.c b/test/fixtures/scc_ordering_bug/test_case/src/state.c new file mode 100644 index 0000000..d9afdb2 --- /dev/null +++ b/test/fixtures/scc_ordering_bug/test_case/src/state.c @@ -0,0 +1,8 @@ +#include "header.h" + +int compute(int x); + +/* Provide external definition of helper for callers that only see the declaration */ +extern inline int helper(int x); + +struct vtable_t vtable = { .fn = compute }; diff --git a/test/fixtures/typedef_cross_tu/test_case/CMakeLists.txt b/test/fixtures/typedef_cross_tu/test_case/CMakeLists.txt new file mode 100644 index 0000000..d0769f5 --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_case/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.10) +project(typedef_cross_tu) + +# Library with b.c and c.c — these get parsed from compile_commands.json +# before a.c (because library objects are built first). merge_symbols +# processes b.c first → retains typedef X cursor from b.c. +add_library(xlib SHARED src/b.c src/c.c) +target_include_directories(xlib PRIVATE include) + +# Executable links against xlib; its .sources lists a.c first (from +# add_executable sources) then lib sources appended via LINK_LIBRARIES. +# So source_priority = [a.c, b.c, c.c] → ast_order rank: a.c=0, b.c=1, c.c=2 +# This gives struct X (from a.c) rank 0 < typedef X (from b.c) rank 1. +add_executable(typedef_cross_tu src/a.c) +target_include_directories(typedef_cross_tu PRIVATE include) +target_link_libraries(typedef_cross_tu xlib) diff --git a/test/fixtures/typedef_cross_tu/test_case/include/types.h b/test/fixtures/typedef_cross_tu/test_case/include/types.h new file mode 100644 index 0000000..d1659f4 --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_case/include/types.h @@ -0,0 +1,4 @@ +#ifndef TYPES_H +#define TYPES_H +typedef struct X X; +#endif diff --git a/test/fixtures/typedef_cross_tu/test_case/src/a.c b/test/fixtures/typedef_cross_tu/test_case/src/a.c new file mode 100644 index 0000000..3f493a9 --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_case/src/a.c @@ -0,0 +1,17 @@ +#include "types.h" + +struct X { + X *self; + int val; +}; + +X *create_x(int v) { + (void)v; + return (X *)0; +} + +int main(void) { + X *x = create_x(42); + (void)x; + return 0; +} diff --git a/test/fixtures/typedef_cross_tu/test_case/src/b.c b/test/fixtures/typedef_cross_tu/test_case/src/b.c new file mode 100644 index 0000000..af2d9c2 --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_case/src/b.c @@ -0,0 +1,5 @@ +#include "types.h" + +void consume_x(X *p) { + (void)p; +} diff --git a/test/fixtures/typedef_cross_tu/test_case/src/c.c b/test/fixtures/typedef_cross_tu/test_case/src/c.c new file mode 100644 index 0000000..2ef9bea --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_case/src/c.c @@ -0,0 +1,10 @@ +#include "types.h" + +struct Y { + X *member; + int id; +}; + +struct Y *alloc_y(void) { + return (struct Y *)0; +} diff --git a/test/fixtures/typedef_cross_tu/test_vectors/test.json b/test/fixtures/typedef_cross_tu/test_vectors/test.json new file mode 100644 index 0000000..2748f42 --- /dev/null +++ b/test/fixtures/typedef_cross_tu/test_vectors/test.json @@ -0,0 +1,8 @@ +{ + "test_cases": [ + { + "input": "", + "expected_output": "" + } + ] +} diff --git a/test/test_cargo_test.py b/test/test_cargo_test.py new file mode 100644 index 0000000..dd66c44 --- /dev/null +++ b/test/test_cargo_test.py @@ -0,0 +1,138 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +from pathlib import Path + +import pytest + +from ideas import tools + + +@pytest.fixture +def tmp_crate(tmp_path: Path): + """Create a minimal lib crate with passing and failing integration tests.""" + crate_dir = tmp_path / "test_crate" + crate = tools.Crate(cargo_toml=crate_dir / "Cargo.toml", vcs="none", type="lib") + + (crate_dir / "src" / "lib.rs").write_text("pub fn add(a: i32, b: i32) -> i32 { a + b }\n") + + tests_dir = crate_dir / "tests" + tests_dir.mkdir(exist_ok=True) + (tests_dir / "test_pass.rs").write_text( + "use test_crate::add;\n" + "#[test] fn pass_one() { assert_eq!(add(1, 2), 3); }\n" + "#[test] fn pass_two() { assert_eq!(add(0, 0), 0); }\n" + ) + (tests_dir / "test_mixed.rs").write_text( + "use test_crate::add;\n" + "#[test] fn mixed_pass() { assert_eq!(add(1, 1), 2); }\n" + "#[test] fn mixed_fail() { assert_eq!(add(1, 1), 99); }\n" + ) + (tests_dir / "test_ignored.rs").write_text( + "#[test] fn runs() { assert!(true); }\n" + '#[test] #[ignore] fn skipped() { panic!("should not run"); }\n' + ) + (tests_dir / "test_stdout.rs").write_text( + '#[test] fn noisy() { println!("hello from test"); assert!(true); }\n' + ) + + return crate + + +def _sorted_output(output: str) -> str: + """Sort test result lines for deterministic comparison (nextest runs in parallel).""" + lines = output.splitlines() + test_lines = sorted(line for line in lines if line.startswith("test ") and "..." in line) + rest = [line for line in lines if not (line.startswith("test ") and "..." in line)] + return "\n".join(test_lines + rest) + "\n" + + +# --- cargo nextest run harness --- + + +def test_passing_json(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_pass", message_format="libtest-json" + ) + assert success is True + assert rc == 0 + assert _sorted_output(tools.nextest_json_to_libtest(stdout)) == ( + "test pass_one ... ok\n" + "test pass_two ... ok\n" + "test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out\n" + ) + + +def test_failing_json(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_mixed", message_format="libtest-json" + ) + assert success is False + assert rc == 100 + assert _sorted_output(tools.nextest_json_to_libtest(stdout)) == ( + "test mixed_fail ... FAILED\n" + "test mixed_pass ... ok\n" + "test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out\n" + ) + + +def test_ignored_json(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_ignored", message_format="libtest-json" + ) + assert success is True + assert rc == 0 + assert _sorted_output(tools.nextest_json_to_libtest(stdout)) == ( + "test runs ... ok\n" + "test result: ok. 1 passed; 0 failed; 1 ignored; 0 measured; 0 filtered out\n" + ) + + +def test_stdout_not_in_output(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_stdout", message_format="libtest-json" + ) + assert success is True + assert rc == 0 + assert _sorted_output(tools.nextest_json_to_libtest(stdout)) == ( + "test noisy ... ok\n" + "test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out\n" + ) + + +# --- cargo test harness --- + + +def test_cargo_test_passing(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_pass", test_harness="test", quiet=False + ) + assert success is True + assert rc == 0 + assert "test pass_one ... ok" in stdout + assert "test pass_two ... ok" in stdout + assert "2 passed; 0 failed" in stdout + + +def test_cargo_test_failing(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_mixed", test_harness="test", quiet=False + ) + assert success is False + assert rc == 101 + assert "test mixed_pass ... ok" in stdout + assert "test mixed_fail ... FAILED" in stdout + assert "1 passed; 1 failed" in stdout + + +def test_cargo_test_ignored(tmp_crate): + success, stdout, _, rc = tmp_crate.cargo_test( + name="test_ignored", test_harness="test", quiet=False + ) + assert success is True + assert rc == 0 + assert "test runs ... ok" in stdout + assert "1 passed; 0 failed; 1 ignored" in stdout diff --git a/test/test_clang.py b/test/test_clang.py index 4baec4a..a12f64e 100644 --- a/test/test_clang.py +++ b/test/test_clang.py @@ -10,6 +10,10 @@ from clang.cindex import TranslationUnit, CursorKind +def parse_c(code: str) -> TranslationUnit: + return ast.create_translation_unit(ast.CodeC(code)) + + def test_basic_fns(): code = dedent( """ @@ -27,7 +31,7 @@ def test_basic_fns(): """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 4 @@ -56,7 +60,7 @@ def test_detailed_complete_graph(): """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 3 @@ -104,7 +108,7 @@ def test_basic_types(): """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) # 3 data structures + 3 enumerators + main @@ -127,17 +131,17 @@ def test_basic_types(): def test_forward_declaration(): code = dedent( """ - void print_message(const char* msg); - void print_message(const char* msg) { - printf("%s\\n", msg); + int return_stuff(int input); + int return_stuff(int input) { + return input + 1; } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 1 - assert "c:@F@print_message" in tr.symbols + assert "c:@F@return_stuff" in tr.symbols def test_fake_quotes_unicode(): @@ -154,11 +158,11 @@ def test_fake_quotes_unicode(): } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert ( - tr.symbols["c:@F@main"].code + tr.symbols["c:@F@main"].code.code == dedent( r""" int main() { @@ -167,6 +171,7 @@ def test_fake_quotes_unicode(): } """ ).strip() + + "\n" ) @@ -177,7 +182,7 @@ def test_declaration_after_definition(): static const int a[10]; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 1 @@ -186,7 +191,7 @@ def test_declaration_after_definition(): def test_empty_statement(): code = ";" - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 0 assert len(tr.complete_graph) == 0 @@ -215,7 +220,7 @@ def test_nested_structs(): } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 4 @@ -250,13 +255,13 @@ def test_forward_typedef_struct(): }; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@S@s" in tr.symbols assert "c:@S@s" in tr.complete_graph assert ( - tr.symbols["c:@S@s"].code + tr.symbols["c:@S@s"].code.code == dedent( """ struct s { @@ -264,17 +269,19 @@ def test_forward_typedef_struct(): }; """ ).strip() + + "\n" ) assert "c:file.c@T@s_t" in tr.symbols assert "c:file.c@T@s_t" in tr.complete_graph assert ( - tr.symbols["c:file.c@T@s_t"].code + tr.symbols["c:file.c@T@s_t"].code.code == dedent( """ typedef struct s s_t; """ ).strip() + + "\n" ) assert len(tr.complete_graph["c:@S@s"]) == 1 @@ -292,13 +299,13 @@ def test_backward_typedef_struct(): typedef struct s s_t; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@S@s" in tr.symbols assert "c:@S@s" in tr.complete_graph assert ( - tr.symbols["c:@S@s"].code + tr.symbols["c:@S@s"].code.code == dedent( """ struct s { @@ -306,17 +313,19 @@ def test_backward_typedef_struct(): }; """ ).strip() + + "\n" ) assert "c:file.c@T@s_t" in tr.symbols assert "c:file.c@T@s_t" in tr.complete_graph assert ( - tr.symbols["c:file.c@T@s_t"].code + tr.symbols["c:file.c@T@s_t"].code.code == dedent( """ typedef struct s s_t; """ ).strip() + + "\n" ) assert len(tr.complete_graph["c:@S@s"]) == 1 @@ -333,13 +342,13 @@ def test_tag_typedef_struct(): } s_t; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@S@s" in tr.symbols assert "c:@S@s" in tr.complete_graph assert ( - tr.symbols["c:@S@s"].code + tr.symbols["c:@S@s"].code.code == dedent( """ typedef struct s { @@ -347,12 +356,13 @@ def test_tag_typedef_struct(): } s_t; """ ).strip() + + "\n" ) assert "c:file.c@T@s_t" in tr.symbols assert "c:file.c@T@s_t" in tr.complete_graph assert ( - tr.symbols["c:file.c@T@s_t"].code + tr.symbols["c:file.c@T@s_t"].code.code == dedent( """ typedef struct s { @@ -360,6 +370,7 @@ def test_tag_typedef_struct(): } s_t; """ ).strip() + + "\n" ) assert len(tr.complete_graph["c:@S@s"]) == 1 @@ -401,7 +412,7 @@ def test_local_struct(): } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 4 @@ -429,7 +440,7 @@ def test_complex_typedef(): }; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert len(tr.symbols) == 3 @@ -487,12 +498,12 @@ def test_struct_var(): }; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@var" in tr.symbols assert ( - tr.symbols["c:@var"].code + tr.symbols["c:@var"].code.code == dedent( """ struct S { @@ -500,6 +511,7 @@ def test_struct_var(): } var[] = {{0}}; """ ).strip() + + "\n" ) @@ -513,12 +525,12 @@ def test_anonymous_struct_var(): }; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@var" in tr.symbols assert ( - tr.symbols["c:@var"].code + tr.symbols["c:@var"].code.code == dedent( """ struct { @@ -526,6 +538,7 @@ def test_anonymous_struct_var(): } var[] = {{0}}; """ ).strip() + + "\n" ) @@ -543,13 +556,13 @@ def test_anonymous_struct_function_pointer_var(): } var[] = {{fn}}; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@var" in tr.complete_graph assert "c:@S@S" in tr.complete_graph["c:@var"] assert "c:@F@fn" in tr.complete_graph["c:@var"] - assert tr.symbols["c:@S@S2"].code == tr.symbols["c:@var"].code + assert tr.symbols["c:@S@S2"].code.code == tr.symbols["c:@var"].code.code assert "c:@S@S" in tr.complete_graph["c:@S@S2"] assert "c:@F@fn" in tr.complete_graph["c:@S@S2"] @@ -566,12 +579,12 @@ def test_struct_in_param(): } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@F@test" in tr.symbols assert ( - tr.symbols["c:@F@test"].code + tr.symbols["c:@F@test"].code.code == dedent( """ void test(struct S s) { @@ -579,6 +592,7 @@ def test_struct_in_param(): } """ ).strip() + + "\n" ) @@ -593,14 +607,14 @@ def test_enum_constant(): } """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert "c:@E@E" in tr.symbols assert "c:@E@E" in tr.complete_graph assert len(tr.complete_graph["c:@E@E"]) == 0 assert ( - tr.symbols["c:@E@E"].code + tr.symbols["c:@E@E"].code.code == dedent( """ enum E { @@ -608,12 +622,13 @@ def test_enum_constant(): }; """ ).strip() + + "\n" ) assert "c:@E@E@EC" in tr.symbols assert "c:@E@E@EC" in tr.complete_graph assert len(tr.complete_graph["c:@E@E@EC"]) == 0 - assert tr.symbols["c:@E@E@EC"].code == tr.symbols["c:@E@E"].code + assert tr.symbols["c:@E@E@EC"].code.code == tr.symbols["c:@E@E"].code.code assert "c:@F@main" in tr.symbols assert "c:@F@main" in tr.complete_graph @@ -630,7 +645,7 @@ def test_anonymous_enum(): int var[] = { EC }; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) # Find anonymous enum constant EC @@ -652,11 +667,11 @@ def test_enum_in_struct(): int i = EC; """ ) - tu = ast.create_translation_unit(code) + tu = parse_c(code) tr = ast.extract_info_c(tu) assert ( - tr.symbols["c:@S@S"].code + tr.symbols["c:@S@S"].code.code == dedent( """ struct S { @@ -667,6 +682,7 @@ def test_enum_in_struct(): }; """ ).strip() + + "\n" ) @@ -711,3 +727,75 @@ def test_clang_make_global_multiple_declarations(tmp_path): assert "int v = 42;" in transformed assert "extern int v;" in transformed assert transformed.count("int v;") == 2 + + +def test_clang_make_bindable_function_multiple_declarations(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text( + dedent( + """ + static int f(int x); + int f(int x); + static int f(int x) { + return x + 1; + } + """ + ) + ) + + ast.clang_make_bindable_(c_path, "f") + transformed = c_path.read_text() + + assert transformed.count("extern int f(int x);") == 3 + assert "static int f" not in transformed + assert "{" not in transformed + + +def test_clang_make_bindable_variable_with_initializer(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text("static int j = 0;\n") + + ast.clang_make_bindable_(c_path, "j") + transformed = c_path.read_text() + + assert transformed == "extern int j;\nint j = 0;\n" + + +def test_clang_make_bindable_variable_without_initializer(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text("static int j;\n") + + ast.clang_make_bindable_(c_path, "j") + transformed = c_path.read_text() + + assert transformed == "extern int j;\nint j;\n" + + +def test_clang_make_bindable_variable_already_extern(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text("extern int j;\n") + + ast.clang_make_bindable_(c_path, "j") + transformed = c_path.read_text() + + assert transformed == "extern int j;\n" + + +def test_clang_make_bindable_variable_array_with_initializer(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text("static int arr[3] = {1, 2, 3};\n") + + ast.clang_make_bindable_(c_path, "arr") + transformed = c_path.read_text() + + assert transformed == "extern int arr[3];\nint arr[3] = {1, 2, 3};\n" + + +def test_clang_make_bindable_variable_array_without_initializer(tmp_path): + c_path = tmp_path / "input.c" + c_path.write_text("int array[3];\n") + + ast.clang_make_bindable_(c_path, "array") + transformed = c_path.read_text() + + assert transformed == "extern int array[3];\nint array[3];\n" diff --git a/test/test_cmake.py b/test/test_cmake.py new file mode 100644 index 0000000..348e0ba --- /dev/null +++ b/test/test_cmake.py @@ -0,0 +1,113 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +import json +from pathlib import Path + +from ideas.cmake import _normalize_isystem + + +def test_normalize_isystem_command_space_separated(tmp_path: Path): + """Replaces '-isystem /path' with '-I /path' in command strings.""" + db = [ + {"directory": "/build", "command": "cc -isystem /usr/include -c foo.c", "file": "foo.c"} + ] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["command"] == "cc -I /usr/include -c foo.c" + + +def test_normalize_isystem_command_no_space(tmp_path: Path): + """Replaces '-isystem/path' with '-I/path' in command strings.""" + db = [ + {"directory": "/build", "command": "cc -isystem/usr/include -c foo.c", "file": "foo.c"} + ] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["command"] == "cc -I/usr/include -c foo.c" + + +def test_normalize_isystem_arguments_space_separated(tmp_path: Path): + """Replaces '-isystem' followed by path in arguments array.""" + db = [ + { + "directory": "/build", + "arguments": ["cc", "-isystem", "/usr/include", "-c", "foo.c"], + "file": "foo.c", + } + ] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["arguments"] == ["cc", "-I", "/usr/include", "-c", "foo.c"] + + +def test_normalize_isystem_arguments_joined(tmp_path: Path): + """Replaces '-isystem/path' in arguments array.""" + db = [ + { + "directory": "/build", + "arguments": ["cc", "-isystem/usr/include", "-c", "foo.c"], + "file": "foo.c", + } + ] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["arguments"] == ["cc", "-I/usr/include", "-c", "foo.c"] + + +def test_normalize_isystem_multiple_entries(tmp_path: Path): + """Handles multiple entries and multiple -isystem flags per entry.""" + db = [ + { + "directory": "/build", + "command": "cc -isystem /a -isystem /b -c foo.c", + "file": "foo.c", + }, + {"directory": "/build", "command": "cc -I/c -c bar.c", "file": "bar.c"}, + ] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["command"] == "cc -I /a -I /b -c foo.c" + assert result[1]["command"] == "cc -I/c -c bar.c" + + +def test_normalize_isystem_no_isystem(tmp_path: Path): + """No-op when there are no -isystem flags.""" + db = [{"directory": "/build", "command": "cc -I/usr/include -c foo.c", "file": "foo.c"}] + p = tmp_path / "compile_commands.json" + p.write_text(json.dumps(db)) + + _normalize_isystem(p) + + result = json.loads(p.read_text()) + assert result[0]["command"] == "cc -I/usr/include -c foo.c" + + +def test_normalize_isystem_missing_file(tmp_path: Path): + """No-op when compile_commands.json does not exist.""" + p = tmp_path / "compile_commands.json" + _normalize_isystem(p) # should not raise + assert not p.exists() diff --git a/test/test_consolidate.py b/test/test_consolidate.py new file mode 100644 index 0000000..613ea36 --- /dev/null +++ b/test/test_consolidate.py @@ -0,0 +1,1121 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +from pathlib import Path +from textwrap import dedent + +import networkx as nx +import pytest +import json + +from ideas import ast +from ideas.init.consolidate import ( + create_ast_order, + create_symbol_lexical_key_fn, + get_includes, + get_symbols_and_dependencies, + init as consolidate_init, +) +from ideas.tools import check_c + + +def _usr_by_spelling(symbols: dict[str, ast.Symbol], spelling: str) -> str: + for name, symbol in symbols.items(): + if symbol.spelling == spelling: + return name + raise AssertionError(f"Unable to find symbol with spelling: {spelling}") + + +def _write_compile_commands(tmp_path: Path, c_files: list[Path], extra_flags: str = "") -> Path: + compile_commands = tmp_path / "compile_commands.json" + flags_part = f"{extra_flags} " if extra_flags else "" + compile_commands.write_text( + json.dumps( + [ + { + "directory": str(tmp_path), + "file": str(f), + "command": f"cc {flags_part}-c {f}", + } + for f in c_files + ] + ) + ) + return compile_commands + + +def _ast_order_from_symbols( + symbols: dict[str, ast.Symbol], source_priority: list[Path] | None = None +) -> dict[Path, ast.TreeResult]: + source_priority = source_priority or [] + tu_representatives: dict[Path, ast.Symbol] = {} + for symbol in symbols.values(): + tu_path = Path(symbol.cursor.translation_unit.spelling).resolve() + tu_representatives.setdefault(tu_path, symbol) + fallback_asts = [ + ast.TreeResult(symbols={symbol.name: symbol}) for symbol in tu_representatives.values() + ] + return create_ast_order(source_priority, fallback_asts) + + +def test_symbol_key_is_callable(): + tu = ast.create_translation_unit(ast.CodeC("int alpha(void) { return 1; }")) + symbols = ast.extract_info_c(tu).symbols + + usr = _usr_by_spelling(symbols, "alpha") + key_fn = create_symbol_lexical_key_fn(symbols, _ast_order_from_symbols(symbols)) + + # Comparing a symbol with itself should return equal keys + assert key_fn(usr) == key_fn(usr) + + +def test_symbol_group_key_uses_first_symbol(): + tu = ast.create_translation_unit( + ast.CodeC("int alpha(void) { return 1; } int beta(void) { return 2; }") + ) + symbols = ast.extract_info_c(tu).symbols + alpha_usr = _usr_by_spelling(symbols, "alpha") + beta_usr = _usr_by_spelling(symbols, "beta") + + key_fn = create_symbol_lexical_key_fn(symbols, _ast_order_from_symbols(symbols)) + # Group key uses first element + assert key_fn((alpha_usr, beta_usr)) == key_fn(alpha_usr) + + +def test_symbol_key_uses_tu_ast_order(tmp_path: Path): + a_path = tmp_path / "a.c" + b_path = tmp_path / "b.c" + a_path.write_text("int alpha(void) { return 1; }") + b_path.write_text("int beta(void) { return 2; }") + + a_tree = ast.extract_info_c(ast.create_translation_unit(a_path)) + b_tree = ast.extract_info_c(ast.create_translation_unit(b_path)) + a_symbols = a_tree.symbols + b_symbols = b_tree.symbols + symbols = {**a_symbols, **b_symbols} + + alpha_usr = _usr_by_spelling(symbols, "alpha") + beta_usr = _usr_by_spelling(symbols, "beta") + + ast_order = create_ast_order([b_path, a_path], [a_tree, b_tree]) + key_fn = create_symbol_lexical_key_fn(symbols, ast_order=ast_order) + + # beta (from b.c) should sort before alpha (from a.c) because b.c has higher priority + assert key_fn(beta_usr) < key_fn(alpha_usr) + + +def test_symbol_key_uses_include_order(tmp_path: Path): + a_header = tmp_path / "a.h" + b_header = tmp_path / "b.h" + main_c = tmp_path / "main.c" + + a_header.write_text( + dedent( + """ + #ifndef A_H + #define A_H + static inline int from_a(void) { return 1; } + #endif + """ + ) + ) + b_header.write_text( + dedent( + """ + #ifndef B_H + #define B_H + static inline int from_b(void) { return 2; } + #endif + """ + ) + ) + main_c.write_text( + dedent( + """ + #include "b.h" + #include "a.h" + + int main(void) { + return from_a() + from_b(); + } + """ + ) + ) + + symbols = ast.extract_info_c(ast.create_translation_unit(main_c)).symbols + from_a_usr = _usr_by_spelling(symbols, "from_a") + from_b_usr = _usr_by_spelling(symbols, "from_b") + + key_fn = create_symbol_lexical_key_fn(symbols, _ast_order_from_symbols(symbols)) + + # from_b is included first, so it should sort before from_a + assert key_fn(from_b_usr) < key_fn(from_a_usr) + + +def test_nested_include_symbols_do_not_tie(tmp_path: Path): + parent_header = tmp_path / "parent.h" + nested_header = tmp_path / "nested.h" + main_c = tmp_path / "main.c" + + nested_header.write_text( + dedent( + """ + #ifndef NESTED_H + #define NESTED_H + static inline int nested_sym(void) { return 7; } + #endif + """ + ) + ) + parent_header.write_text( + dedent( + """ + #ifndef PARENT_H + #define PARENT_H + #include "nested.h" + static inline int parent_sym(void) { return 11; } + #endif + """ + ) + ) + main_c.write_text( + dedent( + """ + #include "parent.h" + + int main(void) { + return parent_sym() + nested_sym(); + } + """ + ) + ) + + symbols = ast.extract_info_c(ast.create_translation_unit(main_c)).symbols + parent_usr = _usr_by_spelling(symbols, "parent_sym") + nested_usr = _usr_by_spelling(symbols, "nested_sym") + + key_fn = create_symbol_lexical_key_fn(symbols, _ast_order_from_symbols(symbols)) + + # Desired behavior: nested_sym and parent_sym should not compare as equal + assert key_fn(nested_usr) != key_fn(parent_usr) + + +def test_consolidation_places_typedef_before_struct_definition(tmp_path: Path): + """ + - types.h: typedef struct X X; + - thing.h: includes types.h, defines struct X { fields }; + - thing.c: includes thing.h, uses X in function signatures + + Consolidation must place the typedef before the struct definition so + that uses of 'X' as a bare type name compile correctly. + """ + types_h = tmp_path / "types.h" + thing_h = tmp_path / "thing.h" + thing_c = tmp_path / "thing.c" + + types_h.write_text( + dedent( + """\ + #ifndef TYPES_H + #define TYPES_H + typedef struct git_callbacks git_callbacks; + #endif + """ + ) + ) + thing_h.write_text( + dedent( + """\ + #ifndef THING_H + #define THING_H + #include "types.h" + + struct git_callbacks { + int (*notify)(git_callbacks *self, int status); + void *payload; + }; + + int git_callbacks_init(git_callbacks *out); + #endif + """ + ) + ) + thing_c.write_text( + dedent( + """\ + #include "thing.h" + + int git_callbacks_init(git_callbacks *out) { + out->notify = 0; + out->payload = 0; + return 0; + } + """ + ) + ) + + # Parse the C file (which transitively includes types.h via thing.h) + compile_commands = _write_compile_commands(tmp_path, [thing_c]) + consolidated = consolidate_init(compile_commands, source_priority=[]) + + # The consolidated code must compile — the typedef must appear before + # the struct definition and function that use 'git_callbacks' as a type name. + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile:\n{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_consolidation_typedef_before_struct_cross_tu(tmp_path: Path): + """ + Cross-TU corner case: when the struct does NOT use the typedef name internally, + the typedef and struct can end up in the same SCC with cursors from different TUs. + clang_isBeforeInTranslationUnit returns 0 for both directions (undefined cross-TU), + so order depends on sort stability. + + In valid C, if a struct body uses the typedef name, the typedef must be included + before it — meaning both symbols always appear in the same TU. So cross-TU + comparison can only happen when the struct does NOT reference the typedef, + in which case ordering doesn't affect compilability. + + - types.h: typedef struct Node Node; + - node.h: struct Node { int val; struct Node *next; }; (struct tag only) + - api.c: includes types.h + node.h, uses Node * in function + - internal.c: includes node.h only, uses struct Node * + """ + types_h = tmp_path / "types.h" + node_h = tmp_path / "node.h" + api_c = tmp_path / "api.c" + internal_c = tmp_path / "internal.c" + + types_h.write_text( + dedent( + """\ + #ifndef TYPES_H + #define TYPES_H + typedef struct Node Node; + #endif + """ + ) + ) + node_h.write_text( + dedent( + """\ + #ifndef NODE_H + #define NODE_H + struct Node { + int val; + struct Node *next; + }; + #endif + """ + ) + ) + api_c.write_text( + dedent( + """\ + #include "types.h" + #include "node.h" + + Node *node_create(int val) { + (void)val; + return (Node *)0; + } + """ + ) + ) + internal_c.write_text( + dedent( + """\ + #include "node.h" + + int node_get_val(struct Node *n) { + return n->val; + } + """ + ) + ) + + # Parse both TUs — after merge_symbols, the struct may retain its cursor from + # one TU and the typedef from another, making cross-TU location comparison undefined. + compile_commands = _write_compile_commands(tmp_path, [internal_c, api_c]) + consolidated = consolidate_init( + compile_commands, source_priority=[internal_c.resolve(), api_c.resolve()] + ) + + # The typedef must appear before usages of 'Node' as a bare type name. + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile:\n{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_consolidation_mutual_cross_tu_typedefs(tmp_path: Path): + """ + Mutual cross-references create a cycle that merges symbols from different TUs + into one SCC: + - a_types.h: typedef struct A A; + - b_types.h: typedef struct B B; + - a.c: includes both, defines struct A { B *ref; }; + function using A + - b.c: includes both, defines struct B { A *ref; }; + function using B + + After merge: struct A (from a.c) → typedef B → struct B (from b.c) → typedef A → struct A + All 4 in one SCC with cross-TU cursors. clang_isBeforeInTranslationUnit is + undefined across TUs, so the comparator must still produce compilable output. + """ + a_types_h = tmp_path / "a_types.h" + b_types_h = tmp_path / "b_types.h" + a_c = tmp_path / "a.c" + b_c = tmp_path / "b.c" + + a_types_h.write_text( + dedent( + """\ + #ifndef A_TYPES_H + #define A_TYPES_H + typedef struct A A; + #endif + """ + ) + ) + b_types_h.write_text( + dedent( + """\ + #ifndef B_TYPES_H + #define B_TYPES_H + typedef struct B B; + #endif + """ + ) + ) + a_c.write_text( + dedent( + """\ + #include "a_types.h" + #include "b_types.h" + + struct A { + B *ref; + int val; + }; + + A *create_a(void) { + return (A *)0; + } + """ + ) + ) + b_c.write_text( + dedent( + """\ + #include "a_types.h" + #include "b_types.h" + + struct B { + A *ref; + int val; + }; + + B *create_b(void) { + return (B *)0; + } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [a_c, b_c]) + consolidated = consolidate_init( + compile_commands, source_priority=[a_c.resolve(), b_c.resolve()] + ) + + # Both typedefs must appear before the struct definitions that reference them. + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile:\n{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_macro_wrapped_declaration(tmp_path: Path): + api_h = tmp_path / "api.h" + impl_c = tmp_path / "impl.c" + + api_h.write_text( + dedent( + """\ + #define LIB_EXPORT(type) extern type + + typedef struct my_object my_object; + + LIB_EXPORT(void) my_free(my_object *obj); + LIB_EXPORT(int) my_get_value(my_object *obj); + """ + ) + ) + + # my_free calls my_get_value and vice versa => mutual recursion => SCC + impl_c.write_text( + dedent( + """\ + #include "api.h" + + struct my_object { + int value; + int refcount; + }; + + void my_free(my_object *obj) + { + if (obj && my_get_value(obj) < 0) { + /* free */ + } + } + + int my_get_value(my_object *obj) + { + my_free(obj); + return obj->value; + } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [impl_c]) + consolidated = consolidate_init(compile_commands, source_priority=[]) + + # The consolidated output must not contain the unexpanded macro + assert "LIB_EXPORT" not in consolidated, ( + f"Consolidated output contains unexpanded macro 'LIB_EXPORT':\n{consolidated}" + ) + + # It must still compile + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile:\n{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_typedef_after_struct_cross_tu_three_tus(tmp_path: Path): + """ + Three-TU corner case exposing invalid ordering when typedef and struct + definition form a cycle (same SCC) but their cursors come from different TUs + after merge_symbols. + + Setup: + - types.h: typedef struct X X; (forward-declares struct X via typedef) + - TU1 (a.c): #include "types.h", defines struct X { X *self; int val; }; + The struct body uses the typedef name 'X' → creates cycle: + struct X → typedef X → struct X + - TU2 (b.c): #include "types.h" only, uses X* in a function signature + - TU3 (c.c): #include "types.h", defines struct Y { X *member; }; and a + function returning Y* + + After merge_symbols with asts=[TU2, TU3, TU1]: + - typedef X cursor retained from TU2 (first encounter, definition) + - struct X cursor from TU1 (only TU with full definition) + + With ast_order=[a.c, b.c, c.c]: + - struct X from a.c → rank 0 + - typedef X from b.c → rank 1 + + They share an SCC (mutual dependency via X *self in struct body), so + _merge_pure_type_declaration_sccs sorts them by symbol_lexical_key which + uses ast_order ranks. struct X (rank 0) sorts before typedef X (rank 1). + + Result: consolidated output places struct X { X *self; ... } BEFORE + typedef struct X X; → 'X' is unknown at that point → compilation failure. + """ + types_h = tmp_path / "types.h" + a_c = tmp_path / "a.c" + b_c = tmp_path / "b.c" + c_c = tmp_path / "c.c" + + types_h.write_text( + dedent( + """\ + #ifndef TYPES_H + #define TYPES_H + typedef struct X X; + #endif + """ + ) + ) + a_c.write_text( + dedent( + """\ + #include "types.h" + + struct X { + X *self; + int val; + }; + + X *create_x(int v) { + (void)v; + return (X *)0; + } + """ + ) + ) + b_c.write_text( + dedent( + """\ + #include "types.h" + + void consume_x(X *p) { + (void)p; + } + """ + ) + ) + c_c.write_text( + dedent( + """\ + #include "types.h" + + struct Y { + X *member; + int id; + }; + + struct Y *alloc_y(void) { + return (struct Y *)0; + } + """ + ) + ) + + # Parse TUs — process b first so merge_symbols retains typedef X cursor from b.c + # In init(), get_asts processes in compile_commands order, so list b first. + # ast_order is derived from source_priority: a.c first so struct X gets rank 0. + compile_commands = _write_compile_commands(tmp_path, [b_c, c_c, a_c]) + consolidated = consolidate_init( + compile_commands, source_priority=[a_c.resolve(), b_c.resolve(), c_c.resolve()] + ) + + # The typedef MUST appear before the struct definition that uses 'X' as a + # bare type name in its body. If the cross-TU lexical key comparison + # incorrectly places struct X before typedef X, this will fail. + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile:\n{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_static_function_and_static_variable_same_name_renamed(tmp_path: Path): + a_c = tmp_path / "a.c" + b_c = tmp_path / "b.c" + + a_c.write_text( + dedent( + """\ + static int some(int x) { return x; } + + int use_a(void) { return some(42); } + """ + ) + ) + b_c.write_text( + dedent( + """\ + static int some; + + int use_b(void) { return some; } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [a_c, b_c]) + consolidated = consolidate_init( + compile_commands, source_priority=[a_c.resolve(), b_c.resolve()] + ) + + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile (missing rename for static name collision):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_static_variable_tentative_defs_same_name_renamed(tmp_path: Path): + a_c = tmp_path / "a.c" + b_c = tmp_path / "b.c" + + a_c.write_text( + dedent( + """\ + static int count; + + int get_a(void) { count += 2; return count; } + """ + ) + ) + b_c.write_text( + dedent( + """\ + static char count; + + int get_b(void) { return (int)count + 1; } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [a_c, b_c]) + consolidated = consolidate_init( + compile_commands, source_priority=[a_c.resolve(), b_c.resolve()] + ) + + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile (missing rename for static variable collision):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +@pytest.mark.xfail( + reason="USR mismatch from -isystem; fixed at cmake level in ideas.cmake._normalize_isystem" +) +def test_isystem_inline_function_dependency_not_lost(tmp_path: Path): + """ + When a header is included via -isystem in one TU but via -I in another, + clang generates different USRs for the same static inline function + (e.g. "c:@F@fn" vs "c:file.h@F@fn"). This causes the dependency edge + from a caller in the -isystem TU to be silently dropped during the + .subgraph(project_symbols.keys()) step, because the system-style USR + doesn't match the non-system USR retained in project_symbols. + + This manifests as the inline function definition being placed AFTER + its caller in the consolidated output, causing: + error: call to undeclared function 'my_alloc'; ISO C99 and later do not + support implicit function declarations + """ + from clang.cindex import TranslationUnit as TU + + # alloc.h in util/ with a static inline function + util_dir = tmp_path / "util" + util_dir.mkdir() + alloc_h = util_dir / "alloc.h" + alloc_h.write_text( + dedent( + """\ + #include + static inline void *my_alloc(size_t len) { + return malloc(len); + } + """ + ) + ) + + # bridge.h wraps my_alloc in a macro + ext_dir = tmp_path / "ext" + ext_dir.mkdir() + bridge_h = ext_dir / "bridge.h" + bridge_h.write_text( + dedent( + """\ + #include "alloc.h" + #define ext_malloc(x) my_alloc(x) + """ + ) + ) + + # caller.c in ext/ - calls my_alloc via ext_malloc macro + caller_c = ext_dir / "caller.c" + caller_c.write_text( + dedent( + """\ + #include "bridge.h" + + typedef struct { int val; } item_t; + + static item_t *make_item(int val) { + item_t *p; + if (!(p = (item_t *)ext_malloc(sizeof(item_t)))) + return (void *)0; + p->val = val; + return p; + } + + int do_work(int x) { + item_t *item = make_item(x); + if (item) return item->val; + return -1; + } + """ + ) + ) + + # user.c in util/ - calls my_alloc directly + user_c = util_dir / "user.c" + user_c.write_text( + dedent( + """\ + #include "alloc.h" + + void *my_calloc(size_t n, size_t sz) { + void *p = my_alloc(n * sz); + return p; + } + """ + ) + ) + + # Parse caller.c with -isystem for util/ (ext target uses SYSTEM includes) + caller_tu = TU.from_source( + None, + args=["-c", str(caller_c), "-isystem", str(util_dir), f"-I{ext_dir}"], + ) + assert not any(d.severity >= 3 for d in caller_tu.diagnostics) + + # Parse user.c with regular -I for util/ + user_tu = TU.from_source(None, args=["-c", str(user_c), f"-I{util_dir}"]) + assert not any(d.severity >= 3 for d in user_tu.diagnostics) + + caller_tree = ast.extract_info_c(caller_tu) + user_tree = ast.extract_info_c(user_tu) + + # Verify the USR mismatch exists + caller_alloc_usr = next( + n for n, s in caller_tree.symbols.items() if s.spelling == "my_alloc" + ) + user_alloc_usr = next(n for n, s in user_tree.symbols.items() if s.spelling == "my_alloc") + assert caller_alloc_usr != user_alloc_usr, ( + "Expected USR mismatch between -isystem and -I includes" + ) + + # Put caller.c FIRST in ast_order so its symbols have higher priority + # in the lexicographic sort. This ensures my_alloc (from user.c, rank 1) + # sorts AFTER make_item (from caller.c, rank 0) when the dependency + # edge is missing. + asts = [caller_tree, user_tree] + ast_order = create_ast_order([caller_c, user_c], asts) + + symbols, dependencies = get_symbols_and_dependencies(asts, ast_order=ast_order) + + symbol_lexical_key = create_symbol_lexical_key_fn(symbols, ast_order) + sorted_symbol_groups = list( + nx.lexicographical_topological_sort( + nx.from_dict_of_lists(dependencies, create_using=nx.DiGraph).reverse(copy=False), # type: ignore[reportArgumentType] + key=symbol_lexical_key, + ) + ) + + # Build consolidated output + sources: list[str] = get_includes(symbols) + [""] + for group in sorted_symbol_groups: + if len(group) > 1: + for name in group: + declaration = symbols[name].declaration + if declaration and declaration.text not in sources: + sources.append(declaration.text) + for name in group: + definition = symbols[name].code.text + if definition not in sources: + sources.append(definition) + + consolidated = "\n".join(sources) + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code does not compile (isystem USR mismatch lost dependency):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_static_inline_in_scc_emitted_before_caller(tmp_path: Path): + """ + When a static inline function from a header participates in a dependency + cycle (via a global variable whose initializer references its caller), + all participants collapse into one SCC. The lexical sort within that SCC + uses TU rank. If the caller's TU has a LOWER rank than the inline's TU, + the caller is emitted first — before the inline is defined — causing: + "call to undeclared function" + + The static inline has declaration=None (the definition IS the declaration), + so the SCC emission logic cannot emit a forward declaration for it. + + Setup: + header.h: struct vtable_t, extern vtable, static inline helper() + caller.c: #include "header.h", defines compute() which calls helper() + state.c: #include "header.h", defines vtable = { .fn = compute } + + Cycle: compute -> helper -> vtable -> compute + merge_symbols picks helper from state.c (processed first in asts). + ast_order = [caller.c, state.c] => caller.c rank 0, state.c rank 1. + SCC sort: compute(rank 0) before helper(rank 1) => BUG. + """ + from clang.cindex import TranslationUnit as TU + + # header.h: static inline helper reads extern vtable + header_h = tmp_path / "header.h" + header_h.write_text( + dedent( + """\ + struct vtable_t { int (*fn)(int); }; + extern struct vtable_t vtable; + static inline int helper(int x) { + return vtable.fn(x); + } + """ + ) + ) + + # caller.c (rank 0): defines compute() which calls helper() + caller_c = tmp_path / "caller.c" + caller_c.write_text( + dedent( + """\ + #include "header.h" + int compute(int x) { + return helper(x) + 1; + } + """ + ) + ) + + # state.c (rank 1): includes header.h, defines vtable referencing compute + state_c = tmp_path / "state.c" + state_c.write_text( + dedent( + """\ + #include "header.h" + int compute(int x); + struct vtable_t vtable = { .fn = compute }; + """ + ) + ) + + caller_tu = TU.from_source(None, args=["-c", str(caller_c), f"-I{tmp_path}"]) + state_tu = TU.from_source(None, args=["-c", str(state_c), f"-I{tmp_path}"]) + assert not any(d.severity >= 3 for d in caller_tu.diagnostics) + assert not any(d.severity >= 3 for d in state_tu.diagnostics) + + caller_tree = ast.extract_info_c(caller_tu) + state_tree = ast.extract_info_c(state_tu) + + # state_tree FIRST in asts so merge_symbols picks helper from state.c + # (both have identical code; first encountered wins => state.c). + # ast_order = [caller.c, state.c]: rank 0, rank 1. + # Result: compute(rank 0) emitted before helper(rank 1) in SCC. + # helper has declaration=None (static inline), so no forward decl is emitted. + asts = [state_tree, caller_tree] + ast_order = create_ast_order([caller_c, state_c], asts) + + symbols, dependencies = get_symbols_and_dependencies(asts, ast_order=ast_order) + + # Verify cycle exists + scc_groups = [group for group in dependencies if len(group) > 1] + assert scc_groups, "Expected at least one multi-member SCC" + + # Build consolidated output (mirrors compose_all logic) + symbol_lexical_key = create_symbol_lexical_key_fn(symbols, ast_order) + sorted_symbol_groups = list( + nx.lexicographical_topological_sort( + nx.from_dict_of_lists(dependencies, create_using=nx.DiGraph).reverse(copy=False), # type: ignore[reportArgumentType] + key=symbol_lexical_key, + ) + ) + + sources: list[str] = get_includes(symbols) + [""] + for group in sorted_symbol_groups: + if len(group) > 1: + for name in group: + declaration = symbols[name].declaration + if declaration and declaration.text not in sources: + sources.append(declaration.text) + for name in group: + definition = symbols[name].code.text + if definition not in sources: + sources.append(definition) + + consolidated = "\n".join(sources) + success, error = check_c(consolidated, flags=["-fsyntax-only", "-Wall"]) + assert success, ( + f"Consolidated code fails (static inline in SCC emitted after caller due to TU rank):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_system_macro_double_expansion(tmp_path: Path): + main_c = tmp_path / "main.c" + main_c.write_text( + dedent( + """\ + #include + + void setup_signal(void) { + struct sigaction ign_handler; + ign_handler.sa_handler = SIG_IGN; + } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [main_c]) + consolidated = consolidate_init(compile_commands, source_priority=[]) + + # The consolidated code must compile without double macro expansion + success, error = check_c(consolidated, flags=["-fsyntax-only"]) + assert success, ( + f"Consolidated code does not compile (system macro double expansion):\n{error}\n\n" + f"Consolidated output:\n{consolidated}" + ) + + +def test_gnu_source_preserved_in_consolidation(tmp_path: Path): + main_c = tmp_path / "main.c" + main_c.write_text( + dedent( + """\ + #include + #include + #include + #include + + int count_env(void) { + int count = 0; + char **kv; + for (kv = environ; *kv; kv++) + count++; + return count; + } + + int check_access(const char *path) { + return euidaccess(path, R_OK); + } + + int make_pipe(int fd[2]) { + return pipe2(fd, O_CLOEXEC); + } + + static int cmp_with_ctx(const void *a, const void *b, void *ctx) { + int offset = *(int *)ctx; + return (*(const int *)a + offset) - (*(const int *)b + offset); + } + + void sort_with_context(int *arr, size_t n, int offset) { + qsort_r(arr, n, sizeof(int), cmp_with_ctx, &offset); + } + + const char *get_safe_env(const char *name) { + return secure_getenv(name); + } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [main_c], extra_flags="-D_GNU_SOURCE") + consolidated = consolidate_init(compile_commands, source_priority=[]) + + # All _GNU_SOURCE-gated symbols must appear in the consolidated output + for sym in ("environ", "euidaccess", "pipe2", "qsort_r", "secure_getenv"): + assert sym in consolidated, ( + f"Consolidated output is missing '{sym}' usage:\n{consolidated}" + ) + + success, error = check_c(consolidated, flags=["-fsyntax-only"]) + assert success, ( + f"Consolidated code does not compile without -D_GNU_SOURCE " + f"(feature-test macro lost during consolidation):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_posix_c_source_preserved_in_consolidation(tmp_path: Path): + main_c = tmp_path / "main.c" + main_c.write_text( + dedent( + """\ + #include + #include + #include + + long get_monotonic_ns(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000L + ts.tv_nsec; + } + + char *duplicate(const char *s) { + return strdup(s); + } + + char *next_token(char *str, char **saveptr) { + return strtok_r(str, ":", saveptr); + } + """ + ) + ) + + compile_commands = _write_compile_commands( + tmp_path, [main_c], extra_flags="-std=c11 -D_POSIX_C_SOURCE=200809L" + ) + consolidated = consolidate_init(compile_commands, source_priority=[]) + + # All _POSIX_C_SOURCE-gated symbols must appear in the consolidated output + for sym in ("clock_gettime", "strdup", "strtok_r"): + assert sym in consolidated, ( + f"Consolidated output is missing '{sym}' usage:\n{consolidated}" + ) + + success, error = check_c(consolidated, flags=["-std=c11", "-fsyntax-only"]) + assert success, ( + f"Consolidated code does not compile with -std=c11 without _POSIX_C_SOURCE " + f"(clock_gettime/strdup/strtok_r undeclared — feature-test macro lost):\n" + f"{error}\n\nConsolidated output:\n{consolidated}" + ) + + +def test_system_macro_undefs_preserve_benign_macros(tmp_path: Path): + main_c = tmp_path / "main.c" + main_c.write_text( + dedent( + """\ + #include + #include + #include + #include + #include + #include + #include + + int run(void) { + char *p = NULL; + if (p == NULL) + return EXIT_FAILURE; + int c = fgetc(stdin); + if (c == EOF) + return EXIT_FAILURE; + if (fseek(stdin, 0, SEEK_SET) != 0) + return EXIT_FAILURE; + return EXIT_SUCCESS; + } + + double compute(double x) { + if (x > DBL_MAX) + return HUGE_VAL; + if (x != x) + return NAN; + return x; + } + + bool check(int x) { + if (x > INT_MAX / 2) + return false; + if (x < INT_MIN / 2) + return false; + return true; + } + """ + ) + ) + + compile_commands = _write_compile_commands(tmp_path, [main_c]) + consolidated = consolidate_init(compile_commands, source_priority=[]) + + success, error = check_c(consolidated, flags=["-fsyntax-only"]) + assert success, ( + f"Consolidated code does not compile (benign macros broken):\n{error}\n\n" + f"Consolidated output:\n{consolidated}" + ) diff --git a/test/test_extract_code_from_tu.py b/test/test_extract_code_from_tu.py index 4f5a300..cd03781 100644 --- a/test/test_extract_code_from_tu.py +++ b/test/test_extract_code_from_tu.py @@ -22,14 +22,20 @@ def i_code(fixtures_dir: Path) -> str: return (fixtures_dir / "formatting.c.i").read_text() +def parse_c(code: str): + return ast.create_translation_unit(ast.CodeC(code=code)) + + def test_all_code_from_tu(i_code: str): # Parse the code using clang - tu = ast.create_translation_unit(i_code) + tu = parse_c(i_code) result = ast.extract_info_c(tu) + assert isinstance(result.symbols["c:@F@foo"].code, ast.CodeC) + # Check for exact formatting assert ( - result.symbols["c:@F@foo"].code + result.symbols["c:@F@foo"].code.code == d( """ void foo() { @@ -44,16 +50,17 @@ def test_all_code_from_tu(i_code: str): } """ ).strip() + + "\n" ) def test_newline(): code = "int main(int argc, char **argv) { return 0;\r\n}" - tu = ast.create_translation_unit(code) + tu = parse_c(code) result = ast.extract_info_c(tu) assert ( - result.symbols["c:@F@main"].code + result.symbols["c:@F@main"].code.code == d( """ int main(int argc, char **argv) { @@ -61,4 +68,5 @@ def test_newline(): } """ ).strip() + + "\n" ) diff --git a/test/test_templating.py b/test/test_templating.py index 6221cdf..cdaf820 100644 --- a/test/test_templating.py +++ b/test/test_templating.py @@ -8,7 +8,7 @@ import pytest from pathlib import Path -from ideas.ast_rust import validate_changes +from ideas.ast_rust import CodeRust, validate_changes @pytest.fixture @@ -32,11 +32,11 @@ def modified_invalid(fixtures_dir: Path) -> str: def test_modified_valid(template: str, modified_valid: str): - feedback = validate_changes(modified_valid, template) + feedback = validate_changes(CodeRust(code=modified_valid), CodeRust(code=template)) assert not feedback def test_modified_invalid(template: str, modified_invalid: str): - feedback = validate_changes(modified_invalid, template) + feedback = validate_changes(CodeRust(code=modified_invalid), CodeRust(code=template)) assert feedback assert list(feedback.keys()) == ["top_level_changes", "signature_changes"] diff --git a/test/test_tools.py b/test/test_tools.py index 22f6195..6022073 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: Apache-2.0 # -from contextlib import nullcontext as does_not_raise from pathlib import Path import pytest @@ -22,38 +21,11 @@ def c_paths(fixtures_dir: Path) -> tuple[Path, ...]: return fixtures_dir / "hello_world_good.c", fixtures_dir / "hello_world_bad.c" -@pytest.fixture -def c_files(c_paths: tuple[Path, Path]) -> tuple[str, ...]: - return str(c_paths[0]), str(c_paths[1]) - - @pytest.fixture def rust_paths(fixtures_dir: Path) -> tuple[Path, Path]: return fixtures_dir / "hello_world_good.rs", fixtures_dir / "hello_world_bad.rs" -@pytest.fixture -def rust_files(rust_paths: tuple[Path, Path]) -> tuple[str, str]: - return str(rust_paths[0]), str(rust_paths[1]) - - -@pytest.fixture -def clippy_files(fixtures_dir: Path) -> str: - return str(fixtures_dir / "clippy.rs") - - -def test_compile_c(c_files: tuple[str, str], tmpdir: Path): - # Compilation should succeed - success1, out1 = tools.compile_c(c_files[0], str(tmpdir / "out")) - assert success1 - assert out1 == "" - - # Compilation should fail - success2, out2 = tools.compile_c(c_files[1], str(tmpdir / "out")) - assert not success2 - assert out2 != "" - - def test_check_c(c_paths: tuple[Path, ...]): # Compilation should succeed success1, out1 = tools.check_c(c_paths[0].read_text()) @@ -66,68 +38,6 @@ def test_check_c(c_paths: tuple[Path, ...]): assert out2 != "" -def test_compile_rust(rust_files: tuple[str, str], tmpdir: Path): - # Compilation should succeed - success1, out1 = tools.compile_rust(Path(rust_files[0]).read_text(), tmpdir / "out") - assert success1 - assert out1 == "" - - # Compilation should fail - success2, out2 = tools.compile_rust(Path(rust_files[1]).read_text(), tmpdir / "out") - assert not success2 - assert out2 != "" - - -def test_clippy(clippy_files: str): - # All clippy calls should trigger - all_out = tools.run_clippy(clippy_files) - successes, stdouts, stderrs, _ = zip(*all_out) - outputs = [o + e for o, e in zip(stdouts, stderrs)] - assert not any(successes) - assert not any(map(lambda out: out == "", outputs)) - - -def test_structured(rust_files: tuple[str, str], clippy_files: str, tmpdir: Path): - # JSON dict construction should succeed - _, structured_output = tools.compile_rust( - Path(rust_files[1]).read_text(), tmpdir / "out", structured_output=True - ) - - with does_not_raise(): - as_json = tools.tool_output_to_js_dict(structured_output) - - # Message rendered from the JSON dict should be identical to the original render - _, rendered_og_all = tools.compile_rust(Path(rust_files[1]).read_text(), tmpdir / "out") - rendered_reconstructed = tools.structured_to_rendered(as_json) - # FIXME: strip at the call sites vs somewhere in tools.py - assert rendered_reconstructed.rstrip() == rendered_og_all.rstrip() - - # JSON dict construction should succeed - all_out = tools.run_clippy(clippy_files, structured_output=True) - _, _, structured_outputs, _ = zip(*all_out) - structured_outputs = list(structured_outputs) - - with does_not_raise(): - as_json_all = tools.tool_output_to_js_dict(structured_outputs) - as_json_individual = [tools.tool_output_to_js_dict(so) for so in structured_outputs] - - # Messages rendered from the JSON dict should be identical to the original render - all_out = tools.run_clippy(clippy_files) - _, stdouts, stderrs, _ = zip(*all_out) - rendered_og_all = [o + e for o, e in zip(stdouts, stderrs)] - - rendered_reconstructed_all = tools.structured_to_rendered(as_json_all) - assert rendered_reconstructed_all == "".join(rendered_og_all) - - rendered_reconstructed_individual = [ - tools.structured_to_rendered(so) for so in as_json_individual - ] - the_same = map( - lambda og, recon: og == recon, rendered_og_all, rendered_reconstructed_individual - ) - assert all(the_same) - - def test_check_rust(rust_paths: tuple[Path, Path]): # Compilation should succeed success1, out1 = tools.check_rust(rust_paths[0].read_text()) diff --git a/test/test_wrapper.py b/test/test_wrapper.py new file mode 100644 index 0000000..e713f67 --- /dev/null +++ b/test/test_wrapper.py @@ -0,0 +1,213 @@ +# +# Copyright (C) 2026 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +from pathlib import Path +from textwrap import dedent + +import pytest + +from ideas import wrapper as wrapper_mod + + +@pytest.mark.parametrize( + ("source", "symbol", "expected"), + [ + ( + "int foo = 7;\n", + "foo", + dedent( + """ + unsafe extern "C" { + pub static mut foo: ::std::os::raw::c_int; + } + """ + ).strip(), + ), + ( + "static int sfoo = 7;\n", + "sfoo", + dedent( + """ + unsafe extern "C" { + pub static mut sfoo: ::std::os::raw::c_int; + } + """ + ).strip(), + ), + ( + "int arr[3] = {1,2,3};\n", + "arr", + dedent( + """ + unsafe extern "C" { + pub static mut arr: [::std::os::raw::c_int; 3usize]; + } + """ + ).strip(), + ), + ( + "int arr_unsized[] = {1,2,3};\n", + "arr_unsized", + dedent( + """ + unsafe extern "C" { + pub static mut arr_unsized: [::std::os::raw::c_int; 0usize]; + } + """ + ).strip(), + ), + ( + "static int sarr[2] = {4,5};\n", + "sarr", + dedent( + """ + unsafe extern "C" { + pub static mut sarr: [::std::os::raw::c_int; 2usize]; + } + """ + ).strip(), + ), + ( + "struct Point { int x; int y; };\nstruct Point pt = {1,2};\n", + "pt", + dedent( + """ + #[repr(C)] + #[derive(Debug, Copy, Clone)] + pub struct Point { + pub x: ::std::os::raw::c_int, + pub y: ::std::os::raw::c_int, + } + unsafe extern "C" { + pub static mut pt: Point; + } + """ + ).strip(), + ), + ( + "struct Point { int x; int y; };\nstatic struct Point spt = {3,4};\n", + "spt", + dedent( + """ + #[repr(C)] + #[derive(Debug, Copy, Clone)] + pub struct Point { + pub x: ::std::os::raw::c_int, + pub y: ::std::os::raw::c_int, + } + unsafe extern "C" { + pub static mut spt: Point; + } + """ + ).strip(), + ), + ( + "const int c = 9;\n", + "c", + dedent( + """ + unsafe extern "C" { + pub static c: ::std::os::raw::c_int; + } + """ + ).strip(), + ), + ( + "int x = 0; int *px = &x;\n", + "px", + dedent( + """ + unsafe extern "C" { + pub static mut px: *mut ::std::os::raw::c_int; + } + """ + ).strip(), + ), + ( + "int match = 1;\n", + "match", + dedent( + """ + unsafe extern "C" { + #[link_name = "\\u{1}match"] + pub static mut match_: ::std::os::raw::c_int; + } + """ + ).strip(), + ), + ], +) +def test_bindgen_emits_expected_text_for_global_shapes( + tmp_path: Path, source: str, symbol: str, expected: str +): + c_path = tmp_path / "input.c" + c_path.write_text(source) + + binding = wrapper_mod.bindgen(c_path, symbol) + + assert binding.text.strip() == expected + assert c_path.read_text() == source + + +def test_bindgen_restores_source_when_bindgen_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + c_path = tmp_path / "input.c" + original_src = "static int foo = 7;\n" + c_path.write_text(original_src) + + monkeypatch.setattr( + wrapper_mod, "run_subprocess", lambda *_args, **_kwargs: (False, "", "boom", 1) + ) + + with pytest.raises(ValueError, match="Bindgen failed"): + wrapper_mod.bindgen(c_path, "foo") + + assert c_path.read_text() == original_src + + +def test_bindgen_raises_for_empty_binding(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + c_path = tmp_path / "input.c" + c_path.write_text("int foo(void) { return 1; }\n") + + monkeypatch.setattr( + wrapper_mod, "run_subprocess", lambda *_args, **_kwargs: (True, " \n", "", 0) + ) + + with pytest.raises(ValueError, match="empty binding"): + wrapper_mod.bindgen(c_path, "foo") + + +def test_bindgen_handles_dependent_declarations_for_target_global(tmp_path: Path): + c_path = tmp_path / "input.c" + array_decl = "int arr[] = {1,2,3};\n" + dependent_decl = "static const int num_arr = sizeof(arr) / sizeof(arr[0]);\n" + + c_path.write_text(array_decl) + baseline_binding = wrapper_mod.bindgen(c_path, "arr") + assert c_path.read_text() == array_decl + + c_path.write_text(array_decl + dependent_decl) + dependent_binding = wrapper_mod.bindgen(c_path, "arr") + assert c_path.read_text() == array_decl + dependent_decl + + assert dependent_binding.text.strip() == baseline_binding.text.strip() + + +def test_bindgen_handles_dependent_declarations_for_target_function(tmp_path: Path): + c_path = tmp_path / "input.c" + baseline_source = "int f(int x) { return x + 1; }\n" + dependent_source = baseline_source + "int (*pf)(int) = f;\n" + + c_path.write_text(baseline_source) + baseline_binding = wrapper_mod.bindgen(c_path, "f") + assert c_path.read_text() == baseline_source + + c_path.write_text(dependent_source) + dependent_binding = wrapper_mod.bindgen(c_path, "f") + assert c_path.read_text() == dependent_source + + assert dependent_binding.text.strip() == baseline_binding.text.strip() diff --git a/tools/crateify/Cargo.lock b/tools/crateify/Cargo.lock deleted file mode 100644 index 8f3ac7f..0000000 --- a/tools/crateify/Cargo.lock +++ /dev/null @@ -1,7 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "ideas_crateify" -version = "0.1.0" diff --git a/tools/crateify/Cargo.toml b/tools/crateify/Cargo.toml deleted file mode 100644 index 9ea937f..0000000 --- a/tools/crateify/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "ideas_crateify" -description = "A little tool for creating the base structure for a Rust crate from a C source project" -version = "0.1.0" -edition = "2024" -license = "Apache-2.0" -authors = ["Marcela Melara "] - -[[bin]] -name = "crateify" -path = "src/main.rs" diff --git a/tools/crateify/README.md b/tools/crateify/README.md deleted file mode 100644 index 1ff49ea..0000000 --- a/tools/crateify/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# IDEAS Crateify - -This is a little Rust tool that converts a directory structure with Rust source -files into a valid Rust crate. - -## Pre-requisites - -Install a Rust toolchain (1.85 or later) (see https://www.rust-lang.org/tools/install) -```bash -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -``` - -## Usage - -``` bash -crateify -``` diff --git a/tools/crateify/src/main.rs b/tools/crateify/src/main.rs deleted file mode 100644 index 3fdfc44..0000000 --- a/tools/crateify/src/main.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::env; -use std::fs; -use std::io; -use std::io::{Error, ErrorKind, Write}; -use std::path::Path; - -// inspired by https://github.com/stepancheg/rust-protobuf/blob/7131fb244fb1246d2835f5ad7426e607ee7c4a1f/protobuf-codegen/src/gen/mod_rs.rs -fn gen_interm_mod_rs(path: &Path, mods: Vec) -> io::Result<()> { - // skip if we have no mods - if mods.is_empty() { - return Ok(()); - } - - let mod_path = path.join("mod.rs"); - let mut f = fs::File::create(mod_path)?; - - let mut sorted: Vec = mods.into_iter().collect(); - sorted.sort(); - for m in sorted { - f.write_fmt(format_args!("pub mod {m};\n"))?; - } - - Ok(()) -} - -/// Recurses through the pre-generated Rust translation directory an generates the required mod.rs files at each directory layer -fn crateify(input_path: &Path) -> io::Result<()> { - if input_path.is_dir() { - let mut mods = Vec::::new(); - - for entry in fs::read_dir(&input_path)? { - let entry = entry?; - let path = entry.path(); - if path.is_dir() { - // save the sub mod name so we can include it in the mod.rs - let submod_dir_str = path.to_str().unwrap(); - let mod_name = Path::new(&submod_dir_str).file_name().unwrap(); - - if let Some(m) = mod_name.to_str() { - mods.push(m.to_string()); - } - - crateify(&path)?; - } else { - // we've reached the deepest directory, so we treat each .rs - // source file as its own module - let ext = path.extension().unwrap(); - if let Some(e) = ext.to_str() { - if e == "rs" { - let mod_name = path.file_stem().unwrap(); - if let Some(m) = mod_name.to_str() { - mods.push(m.to_string()); - } - } - } - } - } - gen_interm_mod_rs(&input_path, mods)?; - } - Ok(()) -} - -fn main() -> io::Result<()> { - let args: Vec = env::args().collect(); - - // throw an error if we dont' receive any args - if args.len() < 2 { - return Err(Error::new( - ErrorKind::InvalidInput, - "crateify expects one input argument for the crate directory", - )); - } - - // ignore any other args besides the top-level translation dir - let translation_dir = &args[1]; - - crateify(Path::new(translation_dir)) -} diff --git a/tools/rust_tests/lib_testing.rs b/tools/rust_tests/lib_testing.rs new file mode 100644 index 0000000..6d38a7e --- /dev/null +++ b/tools/rust_tests/lib_testing.rs @@ -0,0 +1,49 @@ +use std::process::Command; + +use once_cell::sync::Lazy; +use std::path::PathBuf; +static ARTIFACT_DIR: Lazy = Lazy::new(|| { + test_cdylib::build_current_project() + .parent() + .expect("Failed to get parent directory of built library") + .to_path_buf() +}); + +fn parse_test_output(output: &str, vector_path: &str) -> bool { + // Check if the output contains ": true" anywhere + output.contains(&format!("{}: true", vector_path)) +} + +fn run_test_vector(vector_path: &str, runner_manifest_path: &str) { + // Run the test vector + let output = Command::new("cargo") + .args(&["run", "--release", "--manifest-path", runner_manifest_path]) + .arg("--") + .arg("-b") + .arg(&*ARTIFACT_DIR) + .arg("lib") + .args(&["-c", vector_path]) + .arg("-d") + .output() + .expect("Failed to execute runner"); + + let stdout = String::from_utf8_lossy(&output.stdout); + + // Parse and assert + let success = parse_test_output(&stdout, &vector_path); + assert!(success, "Test failed or crashed. Full output:\n{}", stdout); +} + +// Macro to generate tests from test vector files +macro_rules! generate_tests { + ($runner_manifest:expr; $($name:ident => $path:expr),* $(,)?) => { + $( + #[test] + fn $name() { + run_test_vector($path, $runner_manifest); + } + )* + }; +} + +// Auto-generated from test vectors diff --git a/uv.lock b/uv.lock index 9c1c059..ac8e71e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,11 @@ version = 1 revision = 3 requires-python = "==3.13.*" +resolution-markers = [ + "sys_platform == 'win32'", + "sys_platform == 'emscripten'", + "sys_platform != 'emscripten' and sys_platform != 'win32'", +] [manifest] build-constraints = [{ name = "setuptools", specifier = "==81.0.0" }] @@ -74,6 +79,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/88/6237e97e3385b57b5f1528647addea5cc03d4d65d5979ab24327d41fb00d/alembic-1.17.2-py3-none-any.whl", hash = "sha256:f483dd1fe93f6c5d49217055e4d15b905b425b6af906746abb35b69c1996c4e6", size = 248554, upload-time = "2025-11-14T20:35:05.699Z" }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -246,16 +260,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/a4/608c542925949b300a295baa422b568f835044c5e3ad20820676b840228a/clang-21.1.7-py3-none-any.whl", hash = "sha256:23ee8f7b62af648009aee5139516b2a2a9320680dbce6e42a53e48bd5e8983ea", size = 40240, upload-time = "2025-12-18T22:04:50.636Z" }, ] +[[package]] +name = "claude-agent-sdk" +version = "0.1.80" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "mcp" }, + { name = "sniffio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/06/0984d8bc2f0f7b05aca2005461d587f9d04d8009fc4a2d333dec1c2f3164/claude_agent_sdk-0.1.80.tar.gz", hash = "sha256:1938d376cd6db273583266b184fc9caf53779841f131bf3fe308014707536019", size = 250299, upload-time = "2026-05-09T06:44:58.202Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/4d/fc78dae356a43126d0142921a73254f371b359b0508d5af046c43bc680bf/claude_agent_sdk-0.1.80-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0a26cfea92029f1e3bcc468657e9bbb464a7bf04519b528ec0182ede3415a311", size = 60909658, upload-time = "2026-05-09T06:45:01.651Z" }, + { url = "https://files.pythonhosted.org/packages/aa/08/586c98a59d30bea43d83a9db7f8468a24affd2f7d3721a0dd010bf4784c8/claude_agent_sdk-0.1.80-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:09f025305524e909c8ee190e73ad319b0d14f2c5d2d1ec567995c1eb833f4de7", size = 62949213, upload-time = "2026-05-09T06:45:04.848Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a8/e7825005610e711fdebcc5c82c5de2214bb967f1cf5a14edd50ef16e0bc0/claude_agent_sdk-0.1.80-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:be269e118cce52b638b17232f2a52ce4d0877218672261d987cc20b4e5d9c83a", size = 70625763, upload-time = "2026-05-09T06:45:07.899Z" }, + { url = "https://files.pythonhosted.org/packages/fb/dd/a754eed2ab4f8437aac52d4d321e28c4d8bfd6ca126b5179b441aa7aeadf/claude_agent_sdk-0.1.80-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:653fb53600777c253885f9536c17da19d12f9d7fedd5e419c522854f1089449a", size = 70806172, upload-time = "2026-05-09T06:45:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a8/3b27d7aa434b471a3100d158c7e709d09e7be0179a6c34be27def3ffaa1f/claude_agent_sdk-0.1.80-py3-none-win_amd64.whl", hash = "sha256:51ecfc32257201fc2cb6c061ba4d78e27b789a736fd5ed1e6ec0af60fd5d81aa", size = 71422151, upload-time = "2026-05-09T06:45:15.043Z" }, +] + [[package]] name = "click" -version = "8.3.1" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, ] [[package]] @@ -327,6 +359,64 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/f1/00ce3bde3ca542d1acd8f8cfa38e446840945aa6363f9b74746394b14127/cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3", size = 3472985, upload-time = "2026-04-08T01:57:36.714Z" }, ] +[[package]] +name = "datasets" +version = "4.8.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dill" }, + { name = "filelock" }, + { name = "fsspec", extra = ["http"] }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "multiprocess" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyarrow" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/34/14cd8e76f907f7d4dca2334cfeec9f81d30fd15c25a015f99aaea694eaed/datasets-4.8.5.tar.gz", hash = "sha256:0f0c1c3d56ffff2c93b2f4c63c95bac94f3d7e8621aea2a2a576275233bba772", size = 605649, upload-time = "2026-04-27T15:43:57.384Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/99/00f3196036501b53032c4b1ab8337a0b978dee832ed276dae3815df4e8b5/datasets-4.8.5-py3-none-any.whl", hash = "sha256:5079900781719c0e063a8efdd2cd95a31ad0c63209178669cd23cf1b926149ff", size = 528973, upload-time = "2026-04-27T15:43:53.702Z" }, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788, upload-time = "2020-04-20T14:23:38.738Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, +] + +[[package]] +name = "dill" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" }, +] + +[[package]] +name = "dirhash" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "scantree" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/70/49f93897f3a4f7ab5f20a854ebc91aad47854e9fb2cd169e3a4452fa3f5e/dirhash-0.5.0.tar.gz", hash = "sha256:e60760f0ab2e935d8cb088923ea2c6492398dca42cec785df778985fd4cd5386", size = 21377, upload-time = "2024-08-03T22:14:13.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/1f/c8bf92552b7f0a13b9f12b85e3de8df6d9814240e0f8ce8f37433df028b3/dirhash-0.5.0-py3-none-any.whl", hash = "sha256:523dfd6b058c64f45b31604376926c6e2bd2ea301d0df23095d4055674e38b09", size = 13119, upload-time = "2024-08-03T22:14:11.688Z" }, +] + [[package]] name = "diskcache" version = "5.6.3" @@ -406,6 +496,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/18/1c93641f25f4e76772b4ffb52a4e289f1706d6bda4f3b59bb6f7c339df46/dspy-3.1.2-py3-none-any.whl", hash = "sha256:23b98bf5abeda260722c445d397d07ea27488c204b8c0ccd6d3e607c4b41bc6b", size = 312290, upload-time = "2026-01-19T14:21:45.776Z" }, ] +[[package]] +name = "fastapi" +version = "0.136.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" }, +] + [[package]] name = "fastuuid" version = "0.14.0" @@ -501,6 +607,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, ] +[package.optional-dependencies] +http = [ + { name = "aiohttp" }, +] + [[package]] name = "gepa" version = "0.0.24" @@ -510,6 +621,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b1/33b035ff1aaf22d4e104c5b15ba48fe5050639764457048e967c20d6317a/gepa-0.0.24-py3-none-any.whl", hash = "sha256:6d8b16699e7b24ed01435dea7bbbc89156a88cbb4b877b14d90e7455db2b0032", size = 137539, upload-time = "2026-01-05T16:45:29.244Z" }, ] +[[package]] +name = "google-api-core" +version = "2.30.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/502a57fb0ec752026d24df1280b162294b22a0afb98a326084f9a979138b/google_api_core-2.30.3.tar.gz", hash = "sha256:e601a37f148585319b26db36e219df68c5d07b6382cff2d580e83404e44d641b", size = 177001, upload-time = "2026-04-10T00:41:28.035Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/15/e56f351cf6ef1cfea58e6ac226a7318ed1deb2218c4b3cc9bd9e4b786c5a/google_api_core-2.30.3-py3-none-any.whl", hash = "sha256:a85761ba72c444dad5d611c2220633480b2b6be2521eca69cca2dbb3ffd6bfe8", size = 173274, upload-time = "2026-04-09T22:57:16.198Z" }, +] + +[[package]] +name = "google-api-python-client" +version = "2.196.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-auth-httplib2" }, + { name = "httplib2" }, + { name = "uritemplate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/f3/34ef8aca7909675fe327f96c1ed927f0520e7acf68af19157e96acc05e76/google_api_python_client-2.196.0.tar.gz", hash = "sha256:9f335d38f6caaa2747bcf64335ed1a9a19047d53e86538eda6a1b17d37f1743d", size = 14628129, upload-time = "2026-05-06T23:47:35.655Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/c7/1817b4edf966d5afcac1c0781ca36d621bc0cb58104c4e7c2a475ab185f7/google_api_python_client-2.196.0-py3-none-any.whl", hash = "sha256:2591e9b47dcb17e4e62a09370aaee3bcf323af8f28ccecdabcd0a42a23ca4db5", size = 15206663, upload-time = "2026-05-06T23:47:32.886Z" }, +] + [[package]] name = "google-auth" version = "2.49.1" @@ -528,6 +671,32 @@ requests = [ { name = "requests" }, ] +[[package]] +name = "google-auth-httplib2" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "httplib2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/b3/f192c8bc7e41e0ebdbd95afcae4783417a34b6a6af62d22daf22c3fd38fc/google_auth_httplib2-0.4.0.tar.gz", hash = "sha256:d5b030a204b7a4b4d553ba9ca701b62481ee2b74419325580be70f7d85ffed35", size = 11161, upload-time = "2026-05-07T08:03:46.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/be/954c35a62b9e31de66b0a43c225c9b6bb9e0f98d6b1dc110a2308e3644f5/google_auth_httplib2-0.4.0-py3-none-any.whl", hash = "sha256:8e55cfafa3358cba85f6cad4a886138e88e158d71e7e5c9ee5936a5c1507fb91", size = 9529, upload-time = "2026-05-07T08:02:12.375Z" }, +] + +[[package]] +name = "google-auth-oauthlib" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "requests-oauthlib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/18/90c7fac516e63cf2058166fce0c88c353647c677b51cc036c09c49bb5cbb/google_auth_oauthlib-1.4.0.tar.gz", hash = "sha256:18b5e28880eb8eba9065c436becdc0ee8e4b59117a73a510679c82f70cd363d2", size = 21675, upload-time = "2026-05-07T08:03:47.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/d3/d7dff0d58a9e9244b48044bfb6a898bfcc8ecc42e0031d1bebc695344725/google_auth_oauthlib-1.4.0-py3-none-any.whl", hash = "sha256:251314f213a9ee46a5ae73988e84fd7cca8bb68e7ecf4bfd45940f9e7f51d070", size = 19261, upload-time = "2026-05-07T08:02:13.798Z" }, +] + [[package]] name = "google-genai" version = "1.70.0" @@ -549,6 +718,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/a3/d4564c8a9beaf6a3cef8d70fa6354318572cebfee65db4f01af0d41f45ba/google_genai-1.70.0-py3-none-any.whl", hash = "sha256:b74c24549d8b4208f4c736fd11857374788e1ffffc725de45d706e35c97fceee", size = 760584, upload-time = "2026-04-01T10:52:44.349Z" }, ] +[[package]] +name = "googleapis-common-protos" +version = "1.75.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/c8/f439cffde755cffa462bfbb156278fa6f9d09119719af9814b858fd4f81f/googleapis_common_protos-1.75.0.tar.gz", hash = "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", size = 151035, upload-time = "2026-05-07T08:04:49.423Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c8/e2645aa8ed02fd4c7a2f59d68783b65b1f3cbdfe39a6308e156509d1fee8/googleapis_common_protos-1.75.0-py3-none-any.whl", hash = "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed", size = 300631, upload-time = "2026-05-07T08:03:30.345Z" }, +] + [[package]] name = "greenlet" version = "3.3.0" @@ -574,6 +755,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + +[[package]] +name = "harbor" +version = "0.6.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "claude-agent-sdk" }, + { name = "datasets" }, + { name = "dirhash" }, + { name = "fastapi" }, + { name = "httpx" }, + { name = "jinja2" }, + { name = "litellm" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "rich" }, + { name = "ruff" }, + { name = "shortuuid" }, + { name = "supabase" }, + { name = "tenacity" }, + { name = "toml" }, + { name = "typer" }, + { name = "uvicorn" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/05/3290ec39da674c3512a289b0f374dda7d016825f204e4815d6f6b7482e33/harbor-0.6.6.tar.gz", hash = "sha256:5653feb22ff4364fd87447d062cc4f9ab99c9b91d02c0c518cb330fafc2abf03", size = 986277, upload-time = "2026-05-07T19:23:39.099Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/69/4ea696ff5cd29de03d784ed4348fefb843b19d5a2d67ba269a2c90cbedea/harbor-0.6.6-py3-none-any.whl", hash = "sha256:30477bf698d6853d6c4bb76d85aec85e1e95190ad3a6e4bf4730435926007652", size = 1120578, upload-time = "2026-05-07T19:23:40.757Z" }, +] + [[package]] name = "hf-xet" version = "1.2.0" @@ -596,6 +822,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -609,6 +844,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] +[[package]] +name = "httplib2" +version = "0.31.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c1/1f/e86365613582c027dda5ddb64e1010e57a3d53e99ab8a72093fa13d565ec/httplib2-0.31.2.tar.gz", hash = "sha256:385e0869d7397484f4eab426197a4c020b606edd43372492337c0b4010ae5d24", size = 250800, upload-time = "2026-01-23T11:04:44.165Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/90/fd509079dfcab01102c0fdd87f3a9506894bc70afcf9e9785ef6b2b3aff6/httplib2-0.31.2-py3-none-any.whl", hash = "sha256:dbf0c2fa3862acf3c55c078ea9c0bc4481d7dc5117cae71be9514912cf9f8349", size = 91099, upload-time = "2026-01-23T11:04:42.78Z" }, +] + [[package]] name = "httpx" version = "0.28.1" @@ -624,6 +871,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + [[package]] name = "huggingface-hub" version = "1.2.4" @@ -659,6 +920,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547, upload-time = "2023-02-23T18:33:40.801Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "ideas" version = "2026.3" @@ -687,7 +957,7 @@ requires-dist = [ { name = "clang", specifier = "==21.1.7" }, { name = "dspy", specifier = "==3.1.2" }, { name = "hydra-core", specifier = "==1.3.2" }, - { name = "kiss-agent-framework", specifier = "==0.2.27" }, + { name = "kiss-agent-framework", specifier = "==2026.5.22" }, { name = "networkx", specifier = "==3.6.1" }, { name = "tomlkit", specifier = ">=0.14.0" }, { name = "tree-sitter", specifier = "==0.25.2" }, @@ -699,7 +969,7 @@ dev = [ { name = "basedpyright", specifier = "==1.29.4" }, { name = "pre-commit", specifier = "==4.2.0" }, { name = "pytest", specifier = "==9.0.3" }, - { name = "ruff", specifier = "==0.11.13" }, + { name = "ruff", specifier = "==0.13.0" }, ] [[package]] @@ -722,14 +992,14 @@ wheels = [ [[package]] name = "importlib-metadata" -version = "8.7.1" +version = "8.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304, upload-time = "2024-09-11T14:56:08.937Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514, upload-time = "2024-09-11T14:56:07.019Z" }, ] [[package]] @@ -799,7 +1069,7 @@ wheels = [ [[package]] name = "jsonschema" -version = "4.25.1" +version = "4.23.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -807,9 +1077,9 @@ dependencies = [ { name = "referencing" }, { name = "rpds-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } +sdist = { url = "https://files.pythonhosted.org/packages/38/2e/03362ee4034a4c917f697890ccd4aec0800ccf9ded7f511971c75451deec/jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", size = 325778, upload-time = "2024-07-08T18:40:05.546Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/69/4a/4f9dbeb84e8850557c02365a0eee0649abe5eb1d84af92a25731c6c0f922/jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566", size = 88462, upload-time = "2024-07-08T18:40:00.165Z" }, ] [[package]] @@ -826,32 +1096,35 @@ wheels = [ [[package]] name = "kiss-agent-framework" -version = "0.2.27" +version = "2026.5.22" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anthropic" }, + { name = "cryptography" }, { name = "docker" }, { name = "flask" }, + { name = "google-api-python-client" }, + { name = "google-auth-oauthlib" }, { name = "google-genai" }, - { name = "numpy" }, + { name = "harbor" }, { name = "openai" }, { name = "playwright" }, { name = "pydantic" }, { name = "pydantic-settings" }, { name = "pyyaml" }, + { name = "requests" }, { name = "rich" }, { name = "slack-sdk" }, - { name = "starlette" }, - { name = "uvicorn" }, + { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/75/e5/2eb6de46d3e061d376f0deb4dd4b36eb956d251a178b8d78582e594d4f71/kiss_agent_framework-0.2.27.tar.gz", hash = "sha256:bd25b4a83e9c35c703af709c335a916ccd3a612f3cf6474a840ecaabb837dd49", size = 1829934, upload-time = "2026-03-11T19:50:24.245Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/f2/7d61c2d1f849f2638b7cb14b0cf5cc8e9c134d598f925fae62a7c1e6616d/kiss_agent_framework-2026.5.22.tar.gz", hash = "sha256:ac179c21fb1bbe8c020ddfe95ffcb32684ffd86396605d0b4435aa066584a5c7", size = 97834964, upload-time = "2026-05-07T22:42:02.092Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/f1/4ab52070ec6cfa6da9728fcdb84399e8ca33057e1d686c777040449df2cb/kiss_agent_framework-0.2.27-py3-none-any.whl", hash = "sha256:8711004527fe1a2e59635b0c0c9abb776779f6abb02fcabb14040da4ef3191f2", size = 452809, upload-time = "2026-03-11T19:50:13.902Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d4/756db46fc33ff2c2dab1b58c939e8e5a835297d6447018f36f7ca8edf82d/kiss_agent_framework-2026.5.22-py3-none-any.whl", hash = "sha256:3a86b1738a8ad7f86fd86057cf6da61cceccaba21d1363ccf23609c7a96caba0", size = 3231386, upload-time = "2026-05-07T22:41:52.836Z" }, ] [[package]] name = "litellm" -version = "1.83.0" +version = "1.83.14" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -867,21 +1140,21 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/92/6ce9737554994ca8e536e5f4f6a87cc7c4774b656c9eb9add071caf7d54b/litellm-1.83.0.tar.gz", hash = "sha256:860bebc76c4bb27b4cf90b4a77acd66dba25aced37e3db98750de8a1766bfb7a", size = 17333062, upload-time = "2026-03-31T05:08:25.331Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8d/7c/c095649380adc96c8630273c1768c2ad1e74aa2ee1dd8dd05d218a60569f/litellm-1.83.14.tar.gz", hash = "sha256:24aef9b47cdc424c833e32f3727f411741c690832cd1fe4405e0077144fe09c9", size = 14836599, upload-time = "2026-04-26T03:16:10.176Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/2c/a670cc050fcd6f45c6199eb99e259c73aea92edba8d5c2fc1b3686d36217/litellm-1.83.0-py3-none-any.whl", hash = "sha256:88c536d339248f3987571493015784671ba3f193a328e1ea6780dbebaa2094a8", size = 15610306, upload-time = "2026-03-31T05:08:21.987Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5c/1b5691575420135e90578543b2bf219497caa33cfd0af64cb38f30288450/litellm-1.83.14-py3-none-any.whl", hash = "sha256:92b11ba2a32cf80707ddf388d18526696c7999a21b418c5e3b6eda1243d2cfdb", size = 16457054, upload-time = "2026-04-26T03:16:05.72Z" }, ] [[package]] name = "mako" -version = "1.3.11" +version = "1.3.12" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/59/8a/805404d0c0b9f3d7a326475ca008db57aea9c5c9f2e1e39ed0faa335571c/mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069", size = 399811, upload-time = "2026-04-14T20:19:51.493Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/62/791b31e69ae182791ec67f04850f2f062716bbd205483d63a215f3e062d3/mako-1.3.12.tar.gz", hash = "sha256:9f778e93289bd410bb35daadeb4fc66d95a746f0b75777b942088b7fd7af550a", size = 400219, upload-time = "2026-04-28T19:01:08.512Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/a5/19d7aaa7e433713ffe881df33705925a196afb9532efc8475d26593921a6/mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77", size = 78503, upload-time = "2026-04-14T20:19:53.233Z" }, + { url = "https://files.pythonhosted.org/packages/bc/b1/a0ec7a5a9db730a08daef1fdfb8090435b82465abbf758a596f0ea88727e/mako-1.3.12-py3-none-any.whl", hash = "sha256:8f61569480282dbf557145ce441e4ba888be453c30989f879f0d652e39f53ea9", size = 78521, upload-time = "2026-04-28T19:01:10.393Z" }, ] [[package]] @@ -926,6 +1199,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, ] +[[package]] +name = "mcp" +version = "1.27.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/83/d1efe7c2980d8a3afa476f4e3d42d53dd54c0ab94c27bee5d755b45c8b73/mcp-1.27.1.tar.gz", hash = "sha256:0f47e1820f8f8f941466b39749eb1d1839a04caddca2bc60e9d46e8a99914924", size = 608458, upload-time = "2026-05-08T16:50:12.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/73/42d9596facebdb533b7f0b86c1b0364ef350d1f8ba78b1052e8a58b48b65/mcp-1.27.1-py3-none-any.whl", hash = "sha256:1af3c4203b329430fde7a87b4fcb6392a041f5cb851fd68fc674016ab4e7c06f", size = 216260, upload-time = "2026-05-08T16:50:10.547Z" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -935,6 +1233,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mmh3" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/1a/edb23803a168f070ded7a3014c6d706f63b90c84ccc024f89d794a3b7a6d/mmh3-5.2.1.tar.gz", hash = "sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad", size = 33775, upload-time = "2026-03-05T15:55:57.716Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/a5/9daa0508a1569a54130f6198d5462a92deda870043624aa3ea72721aa765/mmh3-5.2.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:723b2681ed4cc07d3401bbea9c201ad4f2a4ca6ba8cddaff6789f715dd2b391e", size = 40832, upload-time = "2026-03-05T15:54:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6b/3230c6d80c1f4b766dedf280a92c2241e99f87c1504ff74205ec8cebe451/mmh3-5.2.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:3619473a0e0d329fd4aec8075628f8f616be2da41605300696206d6f36920c3d", size = 41964, upload-time = "2026-03-05T15:54:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/62/fb/648bfddb74a872004b6ee751551bfdda783fe6d70d2e9723bad84dbe5311/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6", size = 39114, upload-time = "2026-03-05T15:54:45.205Z" }, + { url = "https://files.pythonhosted.org/packages/95/c2/ab7901f87af438468b496728d11264cb397b3574d41506e71b92128e0373/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f", size = 39819, upload-time = "2026-03-05T15:54:46.509Z" }, + { url = "https://files.pythonhosted.org/packages/2f/ed/6f88dda0df67de1612f2e130ffea34cf84aaee5bff5b0aff4dbff2babe34/mmh3-5.2.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8", size = 40330, upload-time = "2026-03-05T15:54:47.864Z" }, + { url = "https://files.pythonhosted.org/packages/3d/66/7516d23f53cdf90f43fce24ab80c28f45e6851d78b46bef8c02084edf583/mmh3-5.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6", size = 56078, upload-time = "2026-03-05T15:54:48.9Z" }, + { url = "https://files.pythonhosted.org/packages/bc/34/4d152fdf4a91a132cb226b671f11c6b796eada9ab78080fb5ce1e95adaab/mmh3-5.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:19bbd3b841174ae6ed588536ab5e1b1fe83d046e668602c20266547298d939a9", size = 40498, upload-time = "2026-03-05T15:54:49.942Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4c/8e3af1b6d85a299767ec97bd923f12b06267089c1472c27c1696870d1175/mmh3-5.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03", size = 40033, upload-time = "2026-03-05T15:54:50.994Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/966ea560e32578d453c9e9db53d602cbb1d0da27317e232afa7c38ceba11/mmh3-5.2.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fd96476f04db5ceba1cfa0f21228f67c1f7402296f0e73fee3513aa680ad237b", size = 97320, upload-time = "2026-03-05T15:54:52.072Z" }, + { url = "https://files.pythonhosted.org/packages/bb/0d/2c5f9893b38aeb6b034d1a44ecd55a010148054f6a516abe53b5e4057297/mmh3-5.2.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5", size = 103299, upload-time = "2026-03-05T15:54:53.569Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fc/2ebaef4a4d4376f89761274dc274035ffd96006ab496b4ee5af9b08f21a9/mmh3-5.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593", size = 106222, upload-time = "2026-03-05T15:54:55.092Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/ea7ffe126d0ba0406622602a2d05e1e1a6841cc92fc322eb576c95b27fad/mmh3-5.2.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4", size = 113048, upload-time = "2026-03-05T15:54:56.305Z" }, + { url = "https://files.pythonhosted.org/packages/85/57/9447032edf93a64aa9bef4d9aa596400b1756f40411890f77a284f6293ca/mmh3-5.2.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1", size = 120742, upload-time = "2026-03-05T15:54:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/53/82/a86cc87cc88c92e9e1a598fee509f0409435b57879a6129bf3b3e40513c7/mmh3-5.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:169e0d178cb59314456ab30772429a802b25d13227088085b0d49b9fe1533104", size = 99132, upload-time = "2026-03-05T15:54:58.583Z" }, + { url = "https://files.pythonhosted.org/packages/54/f7/6b16eb1b40ee89bb740698735574536bc20d6cdafc65ae702ea235578e05/mmh3-5.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e4e1f580033335c6f76d1e0d6b56baf009d1a64d6a4816347e4271ba951f46d", size = 98686, upload-time = "2026-03-05T15:55:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/e8/88/a601e9f32ad1410f438a6d0544298ea621f989bd34a0731a7190f7dec799/mmh3-5.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f", size = 106479, upload-time = "2026-03-05T15:55:01.532Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5c/ce29ae3dfc4feec4007a437a1b7435fb9507532a25147602cd5b52be86db/mmh3-5.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2", size = 110030, upload-time = "2026-03-05T15:55:02.934Z" }, + { url = "https://files.pythonhosted.org/packages/13/30/ae444ef2ff87c805d525da4fa63d27cda4fe8a48e77003a036b8461cfd5c/mmh3-5.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a", size = 97536, upload-time = "2026-03-05T15:55:04.135Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f9/dc3787ee5c813cc27fe79f45ad4500d9b5437f23a7402435cc34e07c7718/mmh3-5.2.1-cp313-cp313-win32.whl", hash = "sha256:54b64fb2433bc71488e7a449603bf8bd31fbcf9cb56fbe1eb6d459e90b86c37b", size = 40769, upload-time = "2026-03-05T15:55:05.277Z" }, + { url = "https://files.pythonhosted.org/packages/43/67/850e0b5a1e97799822ebfc4ca0e8c6ece3ed8baf7dcdf64de817dfdda2ca/mmh3-5.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:cae6383181f1e345317742d2ddd88f9e7d2682fa4c9432e3a74e47d92dce0229", size = 41563, upload-time = "2026-03-05T15:55:06.283Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/98c90b28e1da5458e19fbfaf4adb5289208d3bfccd45dd14eab216a2f0bb/mmh3-5.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:022aa1a528604e6c83d0a7705fdef0b5355d897a9e0fa3a8d26709ceaa06965d", size = 39310, upload-time = "2026-03-05T15:55:07.323Z" }, +] + [[package]] name = "multidict" version = "6.7.0" @@ -980,6 +1307,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" }, ] +[[package]] +name = "multiprocess" +version = "0.70.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dill" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" }, + { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" }, + { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" }, + { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" }, + { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" }, +] + [[package]] name = "networkx" version = "3.6.1" @@ -1043,6 +1386,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981, upload-time = "2026-01-10T06:43:52.575Z" }, ] +[[package]] +name = "oauthlib" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + [[package]] name = "omegaconf" version = "2.3.0" @@ -1058,7 +1410,7 @@ wheels = [ [[package]] name = "openai" -version = "2.14.0" +version = "2.24.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1070,9 +1422,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/b1/12fe1c196bea326261718eb037307c1c1fe1dedc2d2d4de777df822e6238/openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952", size = 626938, upload-time = "2025-12-19T03:28:45.742Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/13/17e87641b89b74552ed408a92b231283786523edddc95f3545809fab673c/openai-2.24.0.tar.gz", hash = "sha256:1e5769f540dbd01cb33bc4716a23e67b9d695161a734aff9c5f925e2bf99a673", size = 658717, upload-time = "2026-02-24T20:02:07.958Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/4b/7c1a00c2c3fbd004253937f7520f692a9650767aa73894d7a34f0d65d3f4/openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183", size = 1067558, upload-time = "2025-12-19T03:28:43.727Z" }, + { url = "https://files.pythonhosted.org/packages/c9/30/844dc675ee6902579b8eef01ed23917cc9319a1c9c0c14ec6e39340c96d0/openai-2.24.0-py3-none-any.whl", hash = "sha256:fed30480d7d6c884303287bde864980a4b137b60553ffbcf9ab4a233b7a73d94", size = 1120122, upload-time = "2026-02-24T20:02:05.669Z" }, ] [[package]] @@ -1125,6 +1477,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pandas" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/99/b342345300f13440fe9fe385c3c481e2d9a595ee3bab4d3219247ac94e9a/pandas-3.0.2.tar.gz", hash = "sha256:f4753e73e34c8d83221ba58f232433fca2748be8b18dbca02d242ed153945043", size = 4645855, upload-time = "2026-03-31T06:48:30.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/ca/3e639a1ea6fcd0617ca4e8ca45f62a74de33a56ae6cd552735470b22c8d3/pandas-3.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5918ba197c951dec132b0c5929a00c0bf05d5942f590d3c10a807f6e15a57d3", size = 10321105, upload-time = "2026-03-31T06:46:57.327Z" }, + { url = "https://files.pythonhosted.org/packages/0b/77/dbc82ff2fb0e63c6564356682bf201edff0ba16c98630d21a1fb312a8182/pandas-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d606a041c89c0a474a4702d532ab7e73a14fe35c8d427b972a625c8e46373668", size = 9864088, upload-time = "2026-03-31T06:46:59.935Z" }, + { url = "https://files.pythonhosted.org/packages/5c/2b/341f1b04bbca2e17e13cd3f08c215b70ef2c60c5356ef1e8c6857449edc7/pandas-3.0.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:710246ba0616e86891b58ab95f2495143bb2bc83ab6b06747c74216f583a6ac9", size = 10369066, upload-time = "2026-03-31T06:47:02.792Z" }, + { url = "https://files.pythonhosted.org/packages/12/c5/cbb1ffefb20a93d3f0e1fdcda699fb84976210d411b008f97f48bf6ce27e/pandas-3.0.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d3cfe227c725b1f3dff4278b43d8c784656a42a9325b63af6b1492a8232209e", size = 10876780, upload-time = "2026-03-31T06:47:06.205Z" }, + { url = "https://files.pythonhosted.org/packages/98/fe/2249ae5e0a69bd0ddf17353d0a5d26611d70970111f5b3600cdc8be883e7/pandas-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c3b723df9087a9a9a840e263ebd9f88b64a12075d1bf2ea401a5a42f254f084d", size = 11375181, upload-time = "2026-03-31T06:47:09.383Z" }, + { url = "https://files.pythonhosted.org/packages/de/64/77a38b09e70b6464883b8d7584ab543e748e42c1b5d337a2ee088e0df741/pandas-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3096110bf9eac0070b7208465f2740e2d8a670d5cb6530b5bb884eca495fd39", size = 11928899, upload-time = "2026-03-31T06:47:12.686Z" }, + { url = "https://files.pythonhosted.org/packages/5e/52/42855bf626868413f761addd574acc6195880ae247a5346477a4361c3acb/pandas-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:07a10f5c36512eead51bc578eb3354ad17578b22c013d89a796ab5eee90cd991", size = 9746574, upload-time = "2026-03-31T06:47:15.64Z" }, + { url = "https://files.pythonhosted.org/packages/88/39/21304ae06a25e8bf9fc820d69b29b2c495b2ae580d1e143146c309941760/pandas-3.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:5fdbfa05931071aba28b408e59226186b01eb5e92bea2ab78b65863ca3228d84", size = 9047156, upload-time = "2026-03-31T06:47:18.595Z" }, + { url = "https://files.pythonhosted.org/packages/72/20/7defa8b27d4f330a903bb68eea33be07d839c5ea6bdda54174efcec0e1d2/pandas-3.0.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:dbc20dea3b9e27d0e66d74c42b2d0c1bed9c2ffe92adea33633e3bedeb5ac235", size = 10756238, upload-time = "2026-03-31T06:47:22.012Z" }, + { url = "https://files.pythonhosted.org/packages/e9/95/49433c14862c636afc0e9b2db83ff16b3ad92959364e52b2955e44c8e94c/pandas-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b75c347eff42497452116ce05ef461822d97ce5b9ff8df6edacb8076092c855d", size = 10408520, upload-time = "2026-03-31T06:47:25.197Z" }, + { url = "https://files.pythonhosted.org/packages/3b/f8/462ad2b5881d6b8ec8e5f7ed2ea1893faa02290d13870a1600fe72ad8efc/pandas-3.0.2-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1478075142e83a5571782ad007fb201ed074bdeac7ebcc8890c71442e96adf7", size = 10324154, upload-time = "2026-03-31T06:47:28.097Z" }, + { url = "https://files.pythonhosted.org/packages/0a/65/d1e69b649cbcddda23ad6e4c40ef935340f6f652a006e5cbc3555ac8adb3/pandas-3.0.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5880314e69e763d4c8b27937090de570f1fb8d027059a7ada3f7f8e98bdcb677", size = 10714449, upload-time = "2026-03-31T06:47:30.85Z" }, + { url = "https://files.pythonhosted.org/packages/47/a4/85b59bc65b8190ea3689882db6cdf32a5003c0ccd5a586c30fdcc3ffc4fc/pandas-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b5329e26898896f06035241a626d7c335daa479b9bbc82be7c2742d048e41172", size = 11338475, upload-time = "2026-03-31T06:47:34.026Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c4/bc6966c6e38e5d9478b935272d124d80a589511ed1612a5d21d36f664c68/pandas-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:81526c4afd31971f8b62671442a4b2b51e0aa9acc3819c9f0f12a28b6fcf85f1", size = 11786568, upload-time = "2026-03-31T06:47:36.941Z" }, + { url = "https://files.pythonhosted.org/packages/e8/74/09298ca9740beed1d3504e073d67e128aa07e5ca5ca2824b0c674c0b8676/pandas-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:7cadd7e9a44ec13b621aec60f9150e744cfc7a3dd32924a7e2f45edff31823b0", size = 10488652, upload-time = "2026-03-31T06:47:40.612Z" }, +] + +[[package]] +name = "pathspec" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/82/42f767fc1c1143d6fd36efb827202a2d997a375e160a71eb2888a925aac1/pathspec-1.1.1.tar.gz", hash = "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a", size = 135180, upload-time = "2026-04-27T01:46:08.907Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, +] + [[package]] name = "platformdirs" version = "4.5.1" @@ -1162,6 +1551,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "postgrest" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecation" }, + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/7c/54e7be05adc9fd6fd98dc572ddfc8982d45bec314a55711e37277d440698/postgrest-2.30.0.tar.gz", hash = "sha256:4f89eec56ce605ab6fbddd9b96d526a9bb44962796d44a5d85cb77640eb766c3", size = 14430, upload-time = "2026-05-06T17:35:21.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/aa/ff2e09f99f95ea96fddeb373646bf907dd89a24fc00b5d38e5674ca7c9ca/postgrest-2.30.0-py3-none-any.whl", hash = "sha256:30631e7993da542419f4217cf3b60aa641084731ea15e66a18526a3a52e40a7d", size = 23108, upload-time = "2026-05-06T17:35:20.531Z" }, +] + [[package]] name = "pre-commit" version = "4.2.0" @@ -1217,6 +1621,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] +[[package]] +name = "proto-plus" +version = "1.28.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/56/e647b0c675392d2da368da7b6f158f7368b18542fd6f7d7400a2f39de000/proto_plus-1.28.0.tar.gz", hash = "sha256:38e5696342835b08fc116f30a25665b29531cda9d5d5643e9b81fc312385abd9", size = 57221, upload-time = "2026-05-07T08:04:50.811Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/20/b122d4626976acb81132036d2ad1bb35a1a8775fceb837ec30964622516a/proto_plus-1.28.0-py3-none-any.whl", hash = "sha256:a630604310899e73c59ec302e5765c058d412b2f090b9c79c8822589f14955b8", size = 50410, upload-time = "2026-05-07T08:03:31.962Z" }, +] + +[[package]] +name = "protobuf" +version = "7.34.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" }, + { url = "https://files.pythonhosted.org/packages/eb/9d/aa69df2724ff63efa6f72307b483ce0827f4347cc6d6df24b59e26659fef/protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b", size = 325753, upload-time = "2026-03-20T17:34:38.751Z" }, + { url = "https://files.pythonhosted.org/packages/92/e8/d174c91fd48e50101943f042b09af9029064810b734e4160bbe282fa1caa/protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a", size = 340198, upload-time = "2026-03-20T17:34:39.871Z" }, + { url = "https://files.pythonhosted.org/packages/53/1b/3b431694a4dc6d37b9f653f0c64b0a0d9ec074ee810710c0c3da21d67ba7/protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4", size = 324267, upload-time = "2026-03-20T17:34:41.1Z" }, + { url = "https://files.pythonhosted.org/packages/85/29/64de04a0ac142fb685fd09999bc3d337943fb386f3a0ec57f92fd8203f97/protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a", size = 426628, upload-time = "2026-03-20T17:34:42.536Z" }, + { url = "https://files.pythonhosted.org/packages/4d/87/cb5e585192a22b8bd457df5a2c16a75ea0db9674c3a0a39fc9347d84e075/protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c", size = 437901, upload-time = "2026-03-20T17:34:44.112Z" }, + { url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" }, +] + +[[package]] +name = "pyarrow" +version = "24.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261, upload-time = "2026-04-21T10:51:25.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759, upload-time = "2026-04-21T10:48:07.258Z" }, + { url = "https://files.pythonhosted.org/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471, upload-time = "2026-04-21T10:48:13.347Z" }, + { url = "https://files.pythonhosted.org/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981, upload-time = "2026-04-21T10:48:20.201Z" }, + { url = "https://files.pythonhosted.org/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172, upload-time = "2026-04-21T10:48:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733, upload-time = "2026-04-21T10:48:34.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335, upload-time = "2026-04-21T10:48:42.099Z" }, + { url = "https://files.pythonhosted.org/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748, upload-time = "2026-04-21T10:49:42.532Z" }, + { url = "https://files.pythonhosted.org/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554, upload-time = "2026-04-21T10:48:48.526Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301, upload-time = "2026-04-21T10:48:55.181Z" }, + { url = "https://files.pythonhosted.org/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929, upload-time = "2026-04-21T10:49:03.676Z" }, + { url = "https://files.pythonhosted.org/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365, upload-time = "2026-04-21T10:49:11.714Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819, upload-time = "2026-04-21T10:49:21.474Z" }, + { url = "https://files.pythonhosted.org/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252, upload-time = "2026-04-21T10:49:31.164Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127, upload-time = "2026-04-21T10:49:37.334Z" }, +] + [[package]] name = "pyasn1" version = "0.6.3" @@ -1322,6 +1775,78 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] +[[package]] +name = "pyiceberg" +version = "0.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "click" }, + { name = "fsspec" }, + { name = "mmh3" }, + { name = "pydantic" }, + { name = "pyparsing" }, + { name = "pyroaring" }, + { name = "requests" }, + { name = "rich" }, + { name = "strictyaml" }, + { name = "tenacity" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/f0/7616676603fdbd05ab97816337a9b31be08a5f9e1ffd636260812b217e0f/pyiceberg-0.11.1.tar.gz", hash = "sha256:366fe0d5a74e3cf1d4e7cbf3c49e308da60e7835ea268667be9185388f05d7a5", size = 1076075, upload-time = "2026-03-03T00:10:27.61Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/4c/a122d80d98cb6125d87024681263406433f0c25c699d503f5633521e6809/pyiceberg-0.11.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7ec5db19feab98a31fcd5caccf4a9a4e83f96933d1ca393ba7aea665710c2bb", size = 532644, upload-time = "2026-03-03T00:10:18.574Z" }, + { url = "https://files.pythonhosted.org/packages/10/94/9a8fa5fc580e6dccd34bbbf51e7658cd7b49540e2458783addeff5e22a91/pyiceberg-0.11.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cec0616d2ba6e7dda6327089a2f34ec723aa9ac2c389857ef0b83f65fb135dd6", size = 532787, upload-time = "2026-03-03T00:10:19.656Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ab/ab7c88828bc17d77dbbc5a765419dfec2135629e1d74cdd0762cd38ad867/pyiceberg-0.11.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ddb360da76c62c7c23ec3da40e1af48e6712a563905fea2d1a8911ff7a3b6c4d", size = 722202, upload-time = "2026-03-03T00:10:21.012Z" }, + { url = "https://files.pythonhosted.org/packages/df/38/079cf1c0bf86da315472a926eec0dba10135f43374a2e267336eb98d8c76/pyiceberg-0.11.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d8790f420ebc484236017edba59182cf2a21bd3e4224a0bd0760a9c7268e96a", size = 724037, upload-time = "2026-03-03T00:10:22.176Z" }, + { url = "https://files.pythonhosted.org/packages/08/6b/08eaef477debb110438d943ef3f5985096f660ccb735d6344701cbd075a9/pyiceberg-0.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ae27ba4d37925d5b2cff192acaa70c8bb114d632bbc527cc91fea0370702b866", size = 716035, upload-time = "2026-03-03T00:10:23.789Z" }, + { url = "https://files.pythonhosted.org/packages/0b/59/7671d6a630ab1d85c6e7ca8ddf438dc63a0b0dd183bc4be69bf25c0fa5f6/pyiceberg-0.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:db66a4e0fdfbf4090631d59c3f65e960d9a5561e9259f6f3993cbe91e396837e", size = 720887, upload-time = "2026-03-03T00:10:24.824Z" }, + { url = "https://files.pythonhosted.org/packages/f0/2b/5c8ad37807efaedb14b20f01f36462684468c80da5b74f4018fb4c1804b5/pyiceberg-0.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:eb3a0a3e630ee89758eb96b39b456f4697732351fb0c080e9498ea578f9b71f9", size = 530923, upload-time = "2026-03-03T00:10:26.196Z" }, +] + +[[package]] +name = "pyjwt" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pyparsing" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, +] + +[[package]] +name = "pyroaring" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/46/a50510d080f8cb089303ec0f7cd80736b2949ca3d148f48f1cc90c49e345/pyroaring-1.1.0.tar.gz", hash = "sha256:f02e4021397ae02a139defdc6813b9942ab163de90affddd4ce4efbac299f619", size = 200298, upload-time = "2026-04-24T21:29:25.212Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/75/1d39ecb04e6cd96d191eb8884864355051df80928dd5096a9dea43fbf63b/pyroaring-1.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:72f68a16b00b35481d9b3bfe897ecd8a1f7da69efd92ba5b17347ca11c21cb0d", size = 333363, upload-time = "2026-04-24T21:28:23.838Z" }, + { url = "https://files.pythonhosted.org/packages/20/3e/65cd0871e86d11c5c5cfd0f5abb0ca80eb2b6b5dbe5a2433f315a9ebd90c/pyroaring-1.1.0-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:4c443e9f942b6089efe8c9b264576e9d116f90be28a315679375bba2d8a915d6", size = 710573, upload-time = "2026-04-24T21:28:24.884Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a2/f8f23515f41414332e60cd86e4957e2a6838070b2ad5fe25e80f136de635/pyroaring-1.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:3beb40eb1220d1ce4fb3661bb019e9a21857e5bb294fe8c1c5016aeb6e82318c", size = 384880, upload-time = "2026-04-24T21:28:25.864Z" }, + { url = "https://files.pythonhosted.org/packages/b0/5b/82dc44b5074a1ff62e702d12611272d1711a60d5518dab23f94e1f7a9b3d/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f1f56004e8f1c1489bf279c25f1fa4764252cd9af5fb35675774268a4a615ba", size = 1999529, upload-time = "2026-04-24T21:28:26.859Z" }, + { url = "https://files.pythonhosted.org/packages/11/40/b07bac8cdc4b709a05f5c55bb52d4f684e5ea1fadfa0b6d9decf477a9d2a/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:13660386ea8905ee4d42c21a6275463e2dc7d31e0b5d65eec210aa7043ad96f4", size = 1842927, upload-time = "2026-04-24T21:28:28.056Z" }, + { url = "https://files.pythonhosted.org/packages/0d/60/c4b511965802dfc77978a9e16f2813f47fb3083db1822019ba1bb169c685/pyroaring-1.1.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0dfb6cf50fd8898179e460e699a6b8326ca508c627d083f7bf62f769fe1717d5", size = 2199538, upload-time = "2026-04-24T21:28:29.425Z" }, + { url = "https://files.pythonhosted.org/packages/e8/12/38f6b50b3f3f41a8b752d3e9efcf105b18eb2c66811831059f25613734ac/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:81ebbc0c880c8a10f13118632e5c0d59159ceada8b651bba18f2e6dc70efdeda", size = 2896904, upload-time = "2026-04-24T21:28:30.67Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b6/b5436e4b93c6bf2bd3dd6ccb88cbdc64b12084151a43e2f5c94be50eb710/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:370d191b0d1b32bbd99452ef5f0485f22fcc4bf7404d33b821d0ce2459951152", size = 2733819, upload-time = "2026-04-24T21:28:31.882Z" }, + { url = "https://files.pythonhosted.org/packages/ab/8f/f392f268de9607a5c7a95aaed6b9c8a81f00c14d85c33855e9f492095478/pyroaring-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b3bfad0ae3ef0e67b40c193863dce8b7d79de545dadbe53c19acc3ace38f66", size = 3161730, upload-time = "2026-04-24T21:28:33.244Z" }, + { url = "https://files.pythonhosted.org/packages/9e/a1/03250fd4834b6a5c13e6600bca47ea20fda579f80bce3551d4985185d164/pyroaring-1.1.0-cp313-cp313-win32.whl", hash = "sha256:eead129046822cb0fd47c78740b81bdaffd0515c0bb0306a2318acf0f0540b58", size = 211194, upload-time = "2026-04-24T21:28:35.001Z" }, + { url = "https://files.pythonhosted.org/packages/70/63/d9b307462cddc82fe94a67d6810e5c802818690e131ba690c1de674d8558/pyroaring-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:90ab2f00c09eed5bd986a80c8641e2dc10e7aca1a2d892d89a44b396e39c08ea", size = 263110, upload-time = "2026-04-24T21:28:35.976Z" }, + { url = "https://files.pythonhosted.org/packages/d9/4a/aa6e9833a6ba9a630efdbec8783b63da6602f763b37a5b5fbc01d73a1af1/pyroaring-1.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:51dd2490a64ad4ed53c4fb58ef1ee3f84f6cbd97cdb47abd9065c9f714ab72ef", size = 216546, upload-time = "2026-04-24T21:28:37.065Z" }, +] + [[package]] name = "pytest" version = "9.0.3" @@ -1338,13 +1863,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + [[package]] name = "python-dotenv" -version = "1.2.1" +version = "1.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.28" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/54/a85eb421fbdd5007bc5af39d0f4ed9fa609e0fedbfdc2adcf0b34526870e/python_multipart-0.0.28.tar.gz", hash = "sha256:8550da197eac0f7ab748961fc9509b999fa2662ea25cef857f05249f6893c0f8", size = 45314, upload-time = "2026-05-10T11:05:16.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/a2/43bbc5860b5034e2af4ef99a0e04d726ff329c43e192ef3abaa8d7ecfce5/python_multipart-0.0.28-py3-none-any.whl", hash = "sha256:10faac07eb966c3f48dc415f9dee46c04cb10d58d30a35677db8027c825ed9b6", size = 29438, upload-time = "2026-05-10T11:05:15.052Z" }, ] [[package]] @@ -1375,6 +1921,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, ] +[[package]] +name = "realtime" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/a2/0328d49d3b5fb427068e9200e7de5b0d708d021a1ad98d004bc685d2529e/realtime-2.30.0.tar.gz", hash = "sha256:7aa593da52ed5f92c34ec4e50e32043afa62f219c94f717ad64a66ab0ef9f1ba", size = 18718, upload-time = "2026-05-06T17:35:23.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/75/1b2cfc949595e22d8c05a2aa2cfc222921f7f94177d7e8a90542f3f73b33/realtime-2.30.0-py3-none-any.whl", hash = "sha256:7c93b63d2cf99aa1da4fa8826b03b00cd32f7b38abb27ff47b19eb5dcb5707c6", size = 22376, upload-time = "2026-05-06T17:35:22.568Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -1439,6 +1999,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, +] + [[package]] name = "rich" version = "14.3.3" @@ -1491,27 +2064,41 @@ wheels = [ [[package]] name = "ruff" -version = "0.11.13" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/da/9c6f995903b4d9474b39da91d2d626659af3ff1eeb43e9ae7c119349dba6/ruff-0.11.13.tar.gz", hash = "sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514", size = 4282054, upload-time = "2025-06-05T21:00:15.721Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/ce/a11d381192966e0b4290842cc8d4fac7dc9214ddf627c11c1afff87da29b/ruff-0.11.13-py3-none-linux_armv6l.whl", hash = "sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46", size = 10292516, upload-time = "2025-06-05T20:59:32.944Z" }, - { url = "https://files.pythonhosted.org/packages/78/db/87c3b59b0d4e753e40b6a3b4a2642dfd1dcaefbff121ddc64d6c8b47ba00/ruff-0.11.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48", size = 11106083, upload-time = "2025-06-05T20:59:37.03Z" }, - { url = "https://files.pythonhosted.org/packages/77/79/d8cec175856ff810a19825d09ce700265f905c643c69f45d2b737e4a470a/ruff-0.11.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b", size = 10436024, upload-time = "2025-06-05T20:59:39.741Z" }, - { url = "https://files.pythonhosted.org/packages/8b/5b/f6d94f2980fa1ee854b41568368a2e1252681b9238ab2895e133d303538f/ruff-0.11.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a", size = 10646324, upload-time = "2025-06-05T20:59:42.185Z" }, - { url = "https://files.pythonhosted.org/packages/6c/9c/b4c2acf24ea4426016d511dfdc787f4ce1ceb835f3c5fbdbcb32b1c63bda/ruff-0.11.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc", size = 10174416, upload-time = "2025-06-05T20:59:44.319Z" }, - { url = "https://files.pythonhosted.org/packages/f3/10/e2e62f77c65ede8cd032c2ca39c41f48feabedb6e282bfd6073d81bb671d/ruff-0.11.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629", size = 11724197, upload-time = "2025-06-05T20:59:46.935Z" }, - { url = "https://files.pythonhosted.org/packages/bb/f0/466fe8469b85c561e081d798c45f8a1d21e0b4a5ef795a1d7f1a9a9ec182/ruff-0.11.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933", size = 12511615, upload-time = "2025-06-05T20:59:49.534Z" }, - { url = "https://files.pythonhosted.org/packages/17/0e/cefe778b46dbd0cbcb03a839946c8f80a06f7968eb298aa4d1a4293f3448/ruff-0.11.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165", size = 12117080, upload-time = "2025-06-05T20:59:51.654Z" }, - { url = "https://files.pythonhosted.org/packages/5d/2c/caaeda564cbe103bed145ea557cb86795b18651b0f6b3ff6a10e84e5a33f/ruff-0.11.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71", size = 11326315, upload-time = "2025-06-05T20:59:54.469Z" }, - { url = "https://files.pythonhosted.org/packages/75/f0/782e7d681d660eda8c536962920c41309e6dd4ebcea9a2714ed5127d44bd/ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9", size = 11555640, upload-time = "2025-06-05T20:59:56.986Z" }, - { url = "https://files.pythonhosted.org/packages/5d/d4/3d580c616316c7f07fb3c99dbecfe01fbaea7b6fd9a82b801e72e5de742a/ruff-0.11.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc", size = 10507364, upload-time = "2025-06-05T20:59:59.154Z" }, - { url = "https://files.pythonhosted.org/packages/5a/dc/195e6f17d7b3ea6b12dc4f3e9de575db7983db187c378d44606e5d503319/ruff-0.11.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7", size = 10141462, upload-time = "2025-06-05T21:00:01.481Z" }, - { url = "https://files.pythonhosted.org/packages/f4/8e/39a094af6967faa57ecdeacb91bedfb232474ff8c3d20f16a5514e6b3534/ruff-0.11.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432", size = 11121028, upload-time = "2025-06-05T21:00:04.06Z" }, - { url = "https://files.pythonhosted.org/packages/5a/c0/b0b508193b0e8a1654ec683ebab18d309861f8bd64e3a2f9648b80d392cb/ruff-0.11.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492", size = 11602992, upload-time = "2025-06-05T21:00:06.249Z" }, - { url = "https://files.pythonhosted.org/packages/7c/91/263e33ab93ab09ca06ce4f8f8547a858cc198072f873ebc9be7466790bae/ruff-0.11.13-py3-none-win32.whl", hash = "sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250", size = 10474944, upload-time = "2025-06-05T21:00:08.459Z" }, - { url = "https://files.pythonhosted.org/packages/46/f4/7c27734ac2073aae8efb0119cae6931b6fb48017adf048fdf85c19337afc/ruff-0.11.13-py3-none-win_amd64.whl", hash = "sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3", size = 11548669, upload-time = "2025-06-05T21:00:11.147Z" }, - { url = "https://files.pythonhosted.org/packages/ec/bf/b273dd11673fed8a6bd46032c0ea2a04b2ac9bfa9c628756a5856ba113b0/ruff-0.11.13-py3-none-win_arm64.whl", hash = "sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b", size = 10683928, upload-time = "2025-06-05T21:00:13.758Z" }, +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/1a/1f4b722862840295bcaba8c9e5261572347509548faaa99b2d57ee7bfe6a/ruff-0.13.0.tar.gz", hash = "sha256:5b4b1ee7eb35afae128ab94459b13b2baaed282b1fb0f472a73c82c996c8ae60", size = 5372863, upload-time = "2025-09-10T16:25:37.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/fe/6f87b419dbe166fd30a991390221f14c5b68946f389ea07913e1719741e0/ruff-0.13.0-py3-none-linux_armv6l.whl", hash = "sha256:137f3d65d58ee828ae136a12d1dc33d992773d8f7644bc6b82714570f31b2004", size = 12187826, upload-time = "2025-09-10T16:24:39.5Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/c92296b1fc36d2499e12b74a3fdb230f77af7bdf048fad7b0a62e94ed56a/ruff-0.13.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:21ae48151b66e71fd111b7d79f9ad358814ed58c339631450c66a4be33cc28b9", size = 12933428, upload-time = "2025-09-10T16:24:43.866Z" }, + { url = "https://files.pythonhosted.org/packages/44/cf/40bc7221a949470307d9c35b4ef5810c294e6cfa3caafb57d882731a9f42/ruff-0.13.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:64de45f4ca5441209e41742d527944635a05a6e7c05798904f39c85bafa819e3", size = 12095543, upload-time = "2025-09-10T16:24:46.638Z" }, + { url = "https://files.pythonhosted.org/packages/f1/03/8b5ff2a211efb68c63a1d03d157e924997ada87d01bebffbd13a0f3fcdeb/ruff-0.13.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b2c653ae9b9d46e0ef62fc6fbf5b979bda20a0b1d2b22f8f7eb0cde9f4963b8", size = 12312489, upload-time = "2025-09-10T16:24:49.556Z" }, + { url = "https://files.pythonhosted.org/packages/37/fc/2336ef6d5e9c8d8ea8305c5f91e767d795cd4fc171a6d97ef38a5302dadc/ruff-0.13.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4cec632534332062bc9eb5884a267b689085a1afea9801bf94e3ba7498a2d207", size = 11991631, upload-time = "2025-09-10T16:24:53.439Z" }, + { url = "https://files.pythonhosted.org/packages/39/7f/f6d574d100fca83d32637d7f5541bea2f5e473c40020bbc7fc4a4d5b7294/ruff-0.13.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dcd628101d9f7d122e120ac7c17e0a0f468b19bc925501dbe03c1cb7f5415b24", size = 13720602, upload-time = "2025-09-10T16:24:56.392Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c8/a8a5b81d8729b5d1f663348d11e2a9d65a7a9bd3c399763b1a51c72be1ce/ruff-0.13.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:afe37db8e1466acb173bb2a39ca92df00570e0fd7c94c72d87b51b21bb63efea", size = 14697751, upload-time = "2025-09-10T16:24:59.89Z" }, + { url = "https://files.pythonhosted.org/packages/57/f5/183ec292272ce7ec5e882aea74937f7288e88ecb500198b832c24debc6d3/ruff-0.13.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f96a8d90bb258d7d3358b372905fe7333aaacf6c39e2408b9f8ba181f4b6ef2", size = 14095317, upload-time = "2025-09-10T16:25:03.025Z" }, + { url = "https://files.pythonhosted.org/packages/9f/8d/7f9771c971724701af7926c14dab31754e7b303d127b0d3f01116faef456/ruff-0.13.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b5e3d883e4f924c5298e3f2ee0f3085819c14f68d1e5b6715597681433f153", size = 13144418, upload-time = "2025-09-10T16:25:06.272Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a6/7985ad1778e60922d4bef546688cd8a25822c58873e9ff30189cfe5dc4ab/ruff-0.13.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03447f3d18479df3d24917a92d768a89f873a7181a064858ea90a804a7538991", size = 13370843, upload-time = "2025-09-10T16:25:09.965Z" }, + { url = "https://files.pythonhosted.org/packages/64/1c/bafdd5a7a05a50cc51d9f5711da704942d8dd62df3d8c70c311e98ce9f8a/ruff-0.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:fbc6b1934eb1c0033da427c805e27d164bb713f8e273a024a7e86176d7f462cf", size = 13321891, upload-time = "2025-09-10T16:25:12.969Z" }, + { url = "https://files.pythonhosted.org/packages/bc/3e/7817f989cb9725ef7e8d2cee74186bf90555279e119de50c750c4b7a72fe/ruff-0.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a8ab6a3e03665d39d4a25ee199d207a488724f022db0e1fe4002968abdb8001b", size = 12119119, upload-time = "2025-09-10T16:25:16.621Z" }, + { url = "https://files.pythonhosted.org/packages/58/07/9df080742e8d1080e60c426dce6e96a8faf9a371e2ce22eef662e3839c95/ruff-0.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d2a5c62f8ccc6dd2fe259917482de7275cecc86141ee10432727c4816235bc41", size = 11961594, upload-time = "2025-09-10T16:25:19.49Z" }, + { url = "https://files.pythonhosted.org/packages/6a/f4/ae1185349197d26a2316840cb4d6c3fba61d4ac36ed728bf0228b222d71f/ruff-0.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b7b85ca27aeeb1ab421bc787009831cffe6048faae08ad80867edab9f2760945", size = 12933377, upload-time = "2025-09-10T16:25:22.371Z" }, + { url = "https://files.pythonhosted.org/packages/b6/39/e776c10a3b349fc8209a905bfb327831d7516f6058339a613a8d2aaecacd/ruff-0.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:79ea0c44a3032af768cabfd9616e44c24303af49d633b43e3a5096e009ebe823", size = 13418555, upload-time = "2025-09-10T16:25:25.681Z" }, + { url = "https://files.pythonhosted.org/packages/46/09/dca8df3d48e8b3f4202bf20b1658898e74b6442ac835bfe2c1816d926697/ruff-0.13.0-py3-none-win32.whl", hash = "sha256:4e473e8f0e6a04e4113f2e1de12a5039579892329ecc49958424e5568ef4f768", size = 12141613, upload-time = "2025-09-10T16:25:28.664Z" }, + { url = "https://files.pythonhosted.org/packages/61/21/0647eb71ed99b888ad50e44d8ec65d7148babc0e242d531a499a0bbcda5f/ruff-0.13.0-py3-none-win_amd64.whl", hash = "sha256:48e5c25c7a3713eea9ce755995767f4dcd1b0b9599b638b12946e892123d1efb", size = 13258250, upload-time = "2025-09-10T16:25:31.773Z" }, + { url = "https://files.pythonhosted.org/packages/e1/a3/03216a6a86c706df54422612981fb0f9041dbb452c3401501d4a22b942c9/ruff-0.13.0-py3-none-win_arm64.whl", hash = "sha256:ab80525317b1e1d38614addec8ac954f1b3e662de9d59114ecbf771d00cf613e", size = 12312357, upload-time = "2025-09-10T16:25:35.595Z" }, +] + +[[package]] +name = "scantree" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "pathspec" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/e4/40998faefc72ba1ddeb640a44fba92935353525dba110488806da8339c0b/scantree-0.0.4.tar.gz", hash = "sha256:15bd5cb24483b04db2c70653604e8ea3522e98087db7e38ab8482f053984c0ac", size = 24643, upload-time = "2024-08-03T20:08:59.413Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/ce/828467ddfa0d2fe473673026442d2032d552a168e42cfbf25fd0e5264e0c/scantree-0.0.4-py3-none-any.whl", hash = "sha256:7616ab65aa6b7f16fcf8e6fa1d9afaa99a27ab72bba05c61b691853b96763174", size = 20690, upload-time = "2024-08-03T20:08:58.137Z" }, ] [[package]] @@ -1523,6 +2110,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "shortuuid" +version = "1.0.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e2/bcf761f3bff95856203f9559baf3741c416071dd200c0fc19fad7f078f86/shortuuid-1.0.13.tar.gz", hash = "sha256:3bb9cf07f606260584b1df46399c0b87dd84773e7b25912b7e391e30797c5e72", size = 9662, upload-time = "2024-03-11T20:11:06.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/44/21d6bf170bf40b41396480d8d49ad640bca3f2b02139cd52aa1e272830a5/shortuuid-1.0.13-py3-none-any.whl", hash = "sha256:a482a497300b49b4953e15108a7913244e1bb0d41f9d332f5e9925dba33a3c5a", size = 10529, upload-time = "2024-03-11T20:11:04.807Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + [[package]] name = "slack-sdk" version = "3.41.0" @@ -1562,6 +2167,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" }, ] +[[package]] +name = "sse-starlette" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/82/10cdfab4ab663a6b6bd624d33f55b2cfa41af5105be033a6d5d135a92c5f/sse_starlette-3.4.2.tar.gz", hash = "sha256:2f9a7f51ed84395a0427fb9f66cb1ec11f7899d977a72cbc9070b962a2e14489", size = 35236, upload-time = "2026-05-06T19:42:13.727Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/27/351c71e803c56090d8d3bf9520422debeb8ed938871fd4f7ef519805a6c5/sse_starlette-3.4.2-py3-none-any.whl", hash = "sha256:6ea5d35b7ce979a3de5a0db5f77fe886b1616e4b3e1ad93fba502bd9b5fb662f", size = 16516, upload-time = "2026-05-06T19:42:12.201Z" }, +] + [[package]] name = "starlette" version = "1.0.0" @@ -1574,6 +2192,89 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, ] +[[package]] +name = "storage3" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecation" }, + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "pyiceberg" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/b2/6df208d64630744704d00f2c07197170390d6b4d0098617740f6a7a4fa98/storage3-2.30.0.tar.gz", hash = "sha256:b74e3cac149f2c0553dcb5f4d55d8c35d420d88183a1a2df77727d482665972b", size = 20162, upload-time = "2026-05-06T17:35:25.71Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/5c/bb8c8cc448cfae671c4ffee67f3651892ea59b341f27bed54666190eb8ef/storage3-2.30.0-py3-none-any.whl", hash = "sha256:2bd23a34011c018bd9c130d8a70a09ebd060ae80d946c6204a6fc08161ad728d", size = 28284, upload-time = "2026-05-06T17:35:24.659Z" }, +] + +[[package]] +name = "strenum" +version = "0.4.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/ad/430fb60d90e1d112a62ff57bdd1f286ec73a2a0331272febfddd21f330e1/StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff", size = 23384, upload-time = "2023-06-29T22:02:58.399Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/69/297302c5f5f59c862faa31e6cb9a4cd74721cd1e052b38e464c5b402df8b/StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659", size = 8851, upload-time = "2023-06-29T22:02:56.947Z" }, +] + +[[package]] +name = "strictyaml" +version = "1.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/08/efd28d49162ce89c2ad61a88bd80e11fb77bc9f6c145402589112d38f8af/strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407", size = 115206, upload-time = "2023-03-10T12:50:27.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7c/a81ef5ef10978dd073a854e0fa93b5d8021d0594b639cc8f6453c3c78a1d/strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7", size = 123917, upload-time = "2023-03-10T12:50:17.242Z" }, +] + +[[package]] +name = "supabase" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "postgrest" }, + { name = "realtime" }, + { name = "storage3" }, + { name = "supabase-auth" }, + { name = "supabase-functions" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5c/a6/d2b17021c2db1a9d219c383e0762ac03a62b25468e61ab126b6b561c2f21/supabase-2.30.0.tar.gz", hash = "sha256:efdba41d474038ed220736ba4e64946df56043057ad785c4c3499d27e459975c", size = 9689, upload-time = "2026-05-06T17:35:27.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/82/d213be7d0ce0bb18018744c0ee38ba0d6648d41dbc46ac8558cffe80541f/supabase-2.30.0-py3-none-any.whl", hash = "sha256:f9b259194554f7bfd2dca6c23261f2df588016ca18b18e774f4d85bc941edb03", size = 16634, upload-time = "2026-05-06T17:35:26.696Z" }, +] + +[[package]] +name = "supabase-auth" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", extra = ["http2"] }, + { name = "pydantic" }, + { name = "pyjwt", extra = ["crypto"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/8a/48bbbe0b6703d0670b67e45b90d6a791fd01aace67443d286f760bf48895/supabase_auth-2.30.0.tar.gz", hash = "sha256:6138a53a306a95ed59c03d4e4975469dfc3343a0ade33cc4b37e4ef967ad83f8", size = 39135, upload-time = "2026-05-06T17:35:30.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/40/a99cb4373353bcbf302d962e51da9eac78b3b0f257eb0362c0852b1667f4/supabase_auth-2.30.0-py3-none-any.whl", hash = "sha256:e85e1f51ec0de2172c3a2a8514205f71731a9914f9a770ed199ac0cf054bc82c", size = 48352, upload-time = "2026-05-06T17:35:28.936Z" }, +] + +[[package]] +name = "supabase-functions" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", extra = ["http2"] }, + { name = "strenum" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/e6/5cd8559ec2bb332e6027840c1be292f9989c2fc7b47bf40800aec5586791/supabase_functions-2.30.0.tar.gz", hash = "sha256:025acfd25f1c000ba43d0f7b8e366b0d2e9dfc784b842528e21973eb33006113", size = 4683, upload-time = "2026-05-06T17:35:32.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/da/9dedab32775df04cc22ca72f194b78e895d940f195bed3e02882a65daa9b/supabase_functions-2.30.0-py3-none-any.whl", hash = "sha256:92419459f102767b954cd034856e4ded8e34c78660b32442d66c8b2899c68011", size = 8803, upload-time = "2026-05-06T17:35:31.342Z" }, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -1635,6 +2336,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, +] + [[package]] name = "tomlkit" version = "0.14.0" @@ -1686,6 +2396,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/33/70b320d24cd127d6ca427d2bef1279830f0786a1f2cde160f59b4fb80728/tree_sitter_rust-0.24.0-cp39-abi3-win_arm64.whl", hash = "sha256:7a0538eaf4063b443c6cd80a47df19249f65e27dbdf129396a9193749912d0c0", size = 128583, upload-time = "2025-04-01T21:06:02.58Z" }, ] +[[package]] +name = "typer" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/07/b822e1b307d40e263e8253d2384cf98c51aa2368cc7ba9a07e523a1d964b/typer-0.23.1.tar.gz", hash = "sha256:2070374e4d31c83e7b61362fd859aa683576432fd5b026b060ad6b4cd3b86134", size = 120047, upload-time = "2026-02-13T10:04:30.984Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/91/9b286ab899c008c2cb05e8be99814807e7fbbd33f0c0c960470826e5ac82/typer-0.23.1-py3-none-any.whl", hash = "sha256:3291ad0d3c701cbf522012faccfbb29352ff16ad262db2139e6b01f15781f14e", size = 56813, upload-time = "2026-02-13T10:04:32.008Z" }, +] + [[package]] name = "typer-slim" version = "0.21.1" @@ -1720,13 +2445,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] +[[package]] +name = "tzdata" +version = "2026.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254, upload-time = "2026-04-24T15:22:08.651Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321, upload-time = "2026-04-24T15:22:05.876Z" }, +] + +[[package]] +name = "uritemplate" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" }, +] + [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, ] [[package]] @@ -1758,20 +2501,22 @@ wheels = [ [[package]] name = "websockets" -version = "16.0" +version = "15.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, - { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, - { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, - { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, - { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, - { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, - { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, - { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, - { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, ] [[package]] @@ -1878,3 +2623,28 @@ sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50e wheels = [ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, +]