From dfc49fa338a3a3966ffceefca37122b3177596d7 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 11:05:49 +0900 Subject: [PATCH 01/10] Harden deployment config startup --- .env.example | 2 +- .github/workflows/docker-smoke.yml | 14 +++ DEVELOPMENT.md | 5 + ENVIRONMENT.md | 10 +- apps/api/src/five08/backend/api.py | 32 ++++- .../src/five08/discord_bot/config.py | 2 +- .../src/five08/discord_bot/main.py | 6 +- apps/worker/src/five08/worker/config.py | 23 +--- docs/configuration.md | 8 +- packages/shared/src/five08/settings.py | 24 +--- scripts/docker-smoke.sh | 111 ++++++++++++++++++ tests/unit/test_backend_api.py | 41 +++++++ tests/unit/test_bot.py | 19 +++ tests/unit/test_shared_settings.py | 28 +++-- tests/unit/test_worker_config.py | 46 ++++---- 15 files changed, 288 insertions(+), 83 deletions(-) create mode 100644 .github/workflows/docker-smoke.yml create mode 100755 scripts/docker-smoke.sh diff --git a/.env.example b/.env.example index 6164e93e..771aace8 100644 --- a/.env.example +++ b/.env.example @@ -271,7 +271,7 @@ EMAIL_REQUIRE_SENDER_AUTH_HEADERS=true # KEILA_API_KEY= # KEILA_API_BASE_URL=https://app.keila.io # KEILA_API_TIMEOUT_SECONDS=20.0 -# NEWSLETTER_SYNC_ENABLED=true +# NEWSLETTER_SYNC_ENABLED=false # NEWSLETTER_SYNC_INTERVAL_SECONDS=604800 # NEWSLETTER_SYNC_EXCLUDED_MAILBOXES=system,service-account diff --git a/.github/workflows/docker-smoke.yml b/.github/workflows/docker-smoke.yml new file mode 100644 index 00000000..196d4677 --- /dev/null +++ b/.github/workflows/docker-smoke.yml @@ -0,0 +1,14 @@ +name: Docker Smoke + +on: + merge_group: + workflow_dispatch: + +jobs: + docker-smoke: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Run Docker smoke check + run: ./scripts/docker-smoke.sh diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 788f1402..0155f673 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -142,6 +142,7 @@ Use Compose when you need container parity with deployment: ./scripts/docker-compose.sh up --build ./scripts/docker-compose.sh down ./scripts/docker-compose.sh print-ports +./scripts/docker-smoke.sh ``` `compose.yaml` is the canonical Coolify/base stack. `compose.local.yaml` adds @@ -163,6 +164,10 @@ docker network create 508-infra BuildKit-capable Docker / `docker compose build` support is required because the service Dockerfiles use BuildKit cache mounts. +Use `./scripts/docker-smoke.sh` when you need a deployment-style startup check. +It builds the API image, starts an isolated Redis/Postgres/web stack with only +the core env vars, and verifies `GET /health` reaches a healthy response. + ## Testing And Quality ```bash diff --git a/ENVIRONMENT.md b/ENVIRONMENT.md index c1f164d0..47f18332 100644 --- a/ENVIRONMENT.md +++ b/ENVIRONMENT.md @@ -10,9 +10,8 @@ current precedence rules. ## Required -- `ESPO_BASE_URL` -- `ESPO_API_KEY` - `API_SHARED_SECRET` (required for protected endpoints) +- `POSTGRES_URL` (required in non-local environments) - `MINIO_ROOT_PASSWORD` (required in non-local environments) - `DISCORD_BOT_TOKEN` (Discord bot runtime) @@ -40,7 +39,7 @@ current precedence rules. ## Postgres + Compose Exposure -- `Optional`: `POSTGRES_URL` (default: `postgresql://postgres:postgres@127.0.0.1:5432/workflows`; `./scripts/dev.sh` overrides it to a deterministic per-worktree localhost port, Compose injects a Docker-network URL) +- `Required in non-local environments`: `POSTGRES_URL` (local default: `postgresql://postgres:postgres@127.0.0.1:5432/workflows`; `./scripts/dev.sh` overrides it to a deterministic per-worktree localhost port, Compose injects a Docker-network URL) - `Optional` (Compose DB container): `POSTGRES_DB` (default: `workflows`) - `Optional` (Compose DB container): `POSTGRES_USER` (default: `postgres`) - `Optional` (Compose DB container): `POSTGRES_PASSWORD` (default: `postgres`) @@ -106,7 +105,8 @@ current precedence rules. ## Worker CRM Sync + Skills Extraction -- `Optional`: `CRM_SYNC_ENABLED` (default: `true`) +- `Optional`: `CRM_SYNC_ENABLED` (default: `true`; scheduler starts only when `ESPO_BASE_URL` and `ESPO_API_KEY` are configured) +- `Required for CRM-backed jobs and sync`: `ESPO_BASE_URL`, `ESPO_API_KEY` - `Optional`: `CRM_SYNC_INTERVAL_SECONDS` (default: `900`) - `Optional`: `CRM_SYNC_PAGE_SIZE` (default: `200`) - `Optional`: `CHECK_EMAIL_WAIT` (default: `2`; minutes between mailbox polls) @@ -182,7 +182,7 @@ current precedence rules. - `Optional for Keila contact sync`: `KEILA_API_KEY` - `Optional`: `KEILA_API_BASE_URL` (default: `https://app.keila.io`) - `Optional`: `KEILA_API_TIMEOUT_SECONDS` (default: `20.0`) -- `Optional`: `NEWSLETTER_SYNC_ENABLED` (default: `true`) +- `Optional`: `NEWSLETTER_SYNC_ENABLED` (default: `false`) - `Optional`: `NEWSLETTER_SYNC_INTERVAL_SECONDS` (default: `604800`, one week) - `Optional`: `NEWSLETTER_SYNC_EXCLUDED_MAILBOXES` (comma-separated mailbox local-parts or full addresses to skip during Migadu resync) - Note: mailbox and backup email subscription to configured newsletter tools is best effort. Failures are reported as warnings and do not block mailbox or account creation. diff --git a/apps/api/src/five08/backend/api.py b/apps/api/src/five08/backend/api.py index eb1b4ce8..c8c1e75a 100644 --- a/apps/api/src/five08/backend/api.py +++ b/apps/api/src/five08/backend/api.py @@ -971,6 +971,17 @@ async def _crm_sync_scheduler(app: FastAPI) -> None: await asyncio.sleep(interval_seconds) +def _should_start_crm_sync_scheduler() -> bool: + if not settings.crm_sync_enabled: + return False + if settings.espo_configured: + return True + logger.warning( + "CRM sync scheduler enabled but ESPO_BASE_URL/ESPO_API_KEY are not configured; skipping scheduler startup" + ) + return False + + async def _newsletter_sync_scheduler(app: FastAPI) -> None: queue = app.state.queue interval_seconds = max(60, settings.newsletter_sync_interval_seconds) @@ -1015,7 +1026,9 @@ async def _email_resume_scheduler() -> None: await asyncio.sleep(interval_seconds) -def _check_postgres_connection(connection: Connection) -> bool: +def _check_postgres_connection(connection: Connection | None) -> bool: + if connection is None: + return False try: with connection.cursor() as cursor: cursor.execute("SELECT 1") @@ -9080,19 +9093,28 @@ async def auth_discord_link_consume_handler( @asynccontextmanager async def _lifespan(app: FastAPI) -> Any: - await asyncio.to_thread(run_job_migrations) - redis_conn = get_redis_connection(settings) app.state.redis_conn = redis_conn app.state.postgres_conn_lock = asyncio.Lock() - app.state.postgres_conn = await asyncio.to_thread(get_postgres_connection, settings) + try: + await asyncio.to_thread(run_job_migrations) + except Exception: + logger.exception("Failed to run job migrations during startup") + try: + app.state.postgres_conn = await asyncio.to_thread( + get_postgres_connection, + settings, + ) + except Exception: + logger.exception("Failed to open startup Postgres connection") + app.state.postgres_conn = None app.state.queue = build_queue_client() app.state.auth_store = RedisAuthStore(redis_conn) app.state.oidc_client = OIDCProviderClient(settings) app.state.discord_admin_verifier = DiscordAdminVerifier(settings) app.state.http_client = httpx.AsyncClient(follow_redirects=False) - if settings.crm_sync_enabled: + if _should_start_crm_sync_scheduler(): app.state.crm_sync_task = asyncio.create_task(_crm_sync_scheduler(app)) else: logger.info("CRM sync scheduler disabled by config") diff --git a/apps/discord_bot/src/five08/discord_bot/config.py b/apps/discord_bot/src/five08/discord_bot/config.py index de9bc76a..1bbf37db 100644 --- a/apps/discord_bot/src/five08/discord_bot/config.py +++ b/apps/discord_bot/src/five08/discord_bot/config.py @@ -25,7 +25,7 @@ class Settings(SharedSettings): Required settings must be provided via environment variables or .env file. """ - discord_bot_token: str + discord_bot_token: str = "" discord_admin_roles: str = "Admin,Owner" discord_default_job_forum_channels: str = "gigs:part_time,fulltime-roles:full_time" diff --git a/apps/discord_bot/src/five08/discord_bot/main.py b/apps/discord_bot/src/five08/discord_bot/main.py index 6f0a1961..2d80eecc 100644 --- a/apps/discord_bot/src/five08/discord_bot/main.py +++ b/apps/discord_bot/src/five08/discord_bot/main.py @@ -22,10 +22,14 @@ async def main() -> None: """Main entry point for the bot.""" + token = settings.discord_bot_token.strip() + if not token: + raise RuntimeError("DISCORD_BOT_TOKEN is required to start the Discord bot.") + bot = create_bot() try: - await bot.start(settings.discord_bot_token) + await bot.start(token) except KeyboardInterrupt: logger.info("Bot stopped by user") finally: diff --git a/apps/worker/src/five08/worker/config.py b/apps/worker/src/five08/worker/config.py index 617f989b..053fd519 100644 --- a/apps/worker/src/five08/worker/config.py +++ b/apps/worker/src/five08/worker/config.py @@ -139,18 +139,10 @@ def _strip_optional_runtime_config_string(cls, value: object) -> object: return value.strip() return value - @model_validator(mode="after") - def validate_required_crm_settings(self) -> "WorkerSettings": - """Require EspoCRM settings outside local/test runtime environments.""" - env = self.environment.strip().lower() - if env in {"local", "dev", "development", "test"}: - return self - if not self.espo_base_url or not self.espo_api_key: - raise ValueError( - "ESPO_BASE_URL and ESPO_API_KEY must be set when ENVIRONMENT " - "is non-local." - ) - return self + @property + def espo_configured(self) -> bool: + """Return true when EspoCRM credentials are available.""" + return bool(self.espo_base_url.strip() and self.espo_api_key.strip()) @property def google_forms_allowed_form_ids_set(self) -> set[str]: @@ -193,13 +185,6 @@ def validate_email_resume_intake_settings(self) -> "WorkerSettings": @model_validator(mode="after") def validate_intake_resume_scan_settings(self) -> "WorkerSettings": """Require a scanner command when intake resume scanning is enabled.""" - env = self.environment.strip().lower() - if env not in {"local", "dev", "development", "test"} and not ( - self.intake_resume_require_virus_scan - ): - raise ValueError( - "INTAKE_RESUME_REQUIRE_VIRUS_SCAN must be true when ENVIRONMENT is non-local" - ) if ( self.intake_resume_require_virus_scan and not (self.intake_resume_virus_scan_command or "").strip() diff --git a/docs/configuration.md b/docs/configuration.md index 7a12a038..9dbef44d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -65,8 +65,7 @@ dashboard field. performs CRM lookup and builds planned CRM updates, but skips CRM writes and intake DB persistence. - `INTAKE_RESUME_REQUIRE_VIRUS_SCAN`: when true, downloaded resume files are not - parsed unless the malware scan command succeeds. Defaults to false for - local/dev/test. Non-local deployments must set this to true. + parsed unless the malware scan command succeeds. Defaults to false. - `INTAKE_RESUME_VIRUS_SCAN_COMMAND`: command used to scan downloaded resumes. Required when `INTAKE_RESUME_REQUIRE_VIRUS_SCAN=true`. Include `{path}` where the temporary resume filepath should be inserted. When `{path}` is omitted, @@ -184,7 +183,8 @@ if true multi-queue routing is introduced later. - `ESPO_BASE_URL` - `ESPO_API_KEY` -- `CRM_SYNC_ENABLED` +- `CRM_SYNC_ENABLED`: optional, defaults to `true`; the scheduler starts only + when ESPO credentials are configured. - `CRM_SYNC_INTERVAL_SECONDS` - `CRM_SYNC_PAGE_SIZE` - `CHECK_EMAIL_WAIT` @@ -258,7 +258,7 @@ for same-host deployments. - `KEILA_API_KEY`: optional for Keila contact sync. - `KEILA_API_BASE_URL`: optional, defaults to `https://app.keila.io`. - `KEILA_API_TIMEOUT_SECONDS`: optional, defaults to `20.0`. -- `NEWSLETTER_SYNC_ENABLED`: optional, defaults to `true`; dashboard changes require an API restart because the scheduler starts at startup. +- `NEWSLETTER_SYNC_ENABLED`: optional, defaults to `false`; dashboard changes require an API restart because the scheduler starts at startup. - `NEWSLETTER_SYNC_INTERVAL_SECONDS`: optional, defaults to `604800`; dashboard changes require an API restart because the scheduler sleep interval is startup-bound. - `NEWSLETTER_SYNC_EXCLUDED_MAILBOXES`: optional comma-separated mailbox local-parts or full addresses to skip during Migadu resync. diff --git a/packages/shared/src/five08/settings.py b/packages/shared/src/five08/settings.py index e645035d..a5f830d1 100644 --- a/packages/shared/src/five08/settings.py +++ b/packages/shared/src/five08/settings.py @@ -3,7 +3,7 @@ import os import sys -from pydantic import AliasChoices, Field, field_validator, model_validator +from pydantic import AliasChoices, Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -14,6 +14,9 @@ def normalize_sqlalchemy_postgres_url(url: str) -> str: return url +DEFAULT_POSTGRES_URL = "postgresql://postgres:postgres@127.0.0.1:5432/workflows" + + class SharedSettings(BaseSettings): """Base settings shared by all services in the monorepo.""" @@ -32,7 +35,7 @@ class SharedSettings(BaseSettings): redis_key_prefix: str = "jobs" redis_socket_connect_timeout: float | None = 5.0 redis_socket_timeout: float | None = 5.0 - postgres_url: str = "postgresql://postgres:postgres@127.0.0.1:5432/workflows" + postgres_url: str = DEFAULT_POSTGRES_URL job_max_attempts: int = 8 job_retry_base_seconds: int = 5 job_retry_max_seconds: int = 300 @@ -106,7 +109,7 @@ class SharedSettings(BaseSettings): ), ) keila_api_timeout_seconds: float = 20.0 - newsletter_sync_enabled: bool = True + newsletter_sync_enabled: bool = False newsletter_sync_interval_seconds: int = Field(default=604800, ge=60) newsletter_sync_excluded_mailboxes: str = "" onboarding_email_smtp_server: str | None = Field( @@ -254,21 +257,6 @@ def settings_customise_sources( return (init_settings, env_settings, file_secret_settings) return (init_settings, env_settings, dotenv_settings, file_secret_settings) - @model_validator(mode="after") - def validate_required_secrets(self) -> "SharedSettings": - """Require non-empty runtime secrets in non-local runtime environments.""" - env = self.environment.strip().lower() - if env in {"local", "dev", "development", "test"}: - return self - - if not self.postgres_url.strip(): - raise ValueError("POSTGRES_URL must be set when ENVIRONMENT is non-local.") - if not self.minio_root_password.strip(): - raise ValueError( - "MINIO_ROOT_PASSWORD must be set when ENVIRONMENT is non-local." - ) - return self - @property def sentry_environment_name(self) -> str: """Sentry environment always follows the app runtime environment.""" diff --git a/scripts/docker-smoke.sh b/scripts/docker-smoke.sh new file mode 100755 index 00000000..68100c26 --- /dev/null +++ b/scripts/docker-smoke.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env sh +set -eu + +script_dir=$(CDPATH= cd "$(dirname "$0")" && pwd) +repo_root=$(CDPATH= cd "$script_dir/.." && pwd) +project="five08-smoke-$(date +%s)-$$" +compose_file=$(mktemp "${TMPDIR:-/tmp}/five08-docker-smoke.XXXXXX.yaml") +body_file=$(mktemp "${TMPDIR:-/tmp}/five08-docker-smoke-body.XXXXXX") + +cleanup() { + docker compose -p "$project" -f "$compose_file" down -v --remove-orphans >/dev/null 2>&1 || true + rm -f "$compose_file" "$body_file" +} +trap cleanup EXIT HUP INT TERM + +cat >"$compose_file" <&2 + docker compose -p "$project" -f "$compose_file" logs web >&2 || true + exit 1 +fi + +health_url="http://127.0.0.1:$web_port/health" +echo "Waiting for $health_url" +last_code="" +for _ in $(seq 1 60); do + if [ "$(docker inspect -f '{{.State.Running}}' "$(docker compose -p "$project" -f "$compose_file" ps -q web)" 2>/dev/null || echo false)" != "true" ]; then + echo "web service exited before health check succeeded" >&2 + docker compose -p "$project" -f "$compose_file" logs web >&2 || true + exit 1 + fi + + last_code=$(curl -sS -o "$body_file" -w "%{http_code}" "$health_url" || true) + if [ "$last_code" = "200" ] && grep -q '"status":"healthy"' "$body_file"; then + echo "Docker smoke check passed." + exit 0 + fi + sleep 1 +done + +echo "Docker smoke check failed; last HTTP status: ${last_code:-none}" >&2 +cat "$body_file" >&2 || true +echo >&2 +docker compose -p "$project" -f "$compose_file" logs web >&2 || true +exit 1 diff --git a/tests/unit/test_backend_api.py b/tests/unit/test_backend_api.py index e0612998..3c4fb2cf 100644 --- a/tests/unit/test_backend_api.py +++ b/tests/unit/test_backend_api.py @@ -34,6 +34,47 @@ def ping(self) -> bool: raise RuntimeError("redis unavailable") +def test_crm_sync_scheduler_skips_start_without_espo_config( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(api.settings, "crm_sync_enabled", True) + monkeypatch.setattr(api.settings, "espo_base_url", "") + monkeypatch.setattr(api.settings, "espo_api_key", "") + + assert api._should_start_crm_sync_scheduler() is False + + +def test_crm_sync_scheduler_starts_when_espo_configured( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(api.settings, "crm_sync_enabled", True) + monkeypatch.setattr(api.settings, "espo_base_url", "https://crm.example.com") + monkeypatch.setattr(api.settings, "espo_api_key", "secret") + + assert api._should_start_crm_sync_scheduler() is True + + +def test_lifespan_serves_degraded_health_when_postgres_startup_fails( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fail_postgres(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("postgres unavailable") + + monkeypatch.setattr(api, "get_redis_connection", lambda _settings: _HealthyRedis()) + monkeypatch.setattr(api, "run_job_migrations", fail_postgres) + monkeypatch.setattr(api, "get_postgres_connection", fail_postgres) + monkeypatch.setattr(api, "build_queue_client", Mock(return_value=Mock())) + monkeypatch.setattr(api.settings, "crm_sync_enabled", False) + monkeypatch.setattr(api.settings, "newsletter_sync_enabled", False) + monkeypatch.setattr(api.settings, "email_resume_intake_enabled", False) + + with TestClient(api.create_app(run_lifespan=True)) as degraded_client: + response = degraded_client.get("/health") + + assert response.status_code == 503 + assert response.json()["postgres_connected"] is False + + class _FakeAuthStore(api.RedisAuthStore): def __init__(self) -> None: self.saved_links: dict[str, object] = {} diff --git a/tests/unit/test_bot.py b/tests/unit/test_bot.py index ec8b4f8f..2faf1918 100644 --- a/tests/unit/test_bot.py +++ b/tests/unit/test_bot.py @@ -16,6 +16,7 @@ validate_app_command_descriptions, ) from five08.discord_bot.config import Settings +from five08.discord_bot import main as bot_main class TestBot508: @@ -141,6 +142,24 @@ def test_discord_message_limit_is_not_env_configurable( assert config.discord_sendmsg_character_limit == 2000 + def test_discord_bot_token_defaults_blank_for_config_imports( + self, monkeypatch: pytest.MonkeyPatch + ): + monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False) + + config = Settings() + + assert config.discord_bot_token == "" + + @pytest.mark.asyncio + async def test_discord_bot_token_is_required_to_start( + self, monkeypatch: pytest.MonkeyPatch + ): + monkeypatch.setattr(bot_main.settings, "discord_bot_token", " ") + + with pytest.raises(RuntimeError, match="DISCORD_BOT_TOKEN is required"): + await bot_main.main() + def test_backend_api_base_url_defaults_to_host_runtime( self, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/unit/test_shared_settings.py b/tests/unit/test_shared_settings.py index fd909300..a77791bc 100644 --- a/tests/unit/test_shared_settings.py +++ b/tests/unit/test_shared_settings.py @@ -22,14 +22,20 @@ def test_non_local_settings_accept_explicit_values() -> None: assert settings.environment == "production" -def test_non_local_settings_require_non_empty_secrets() -> None: - """Non-local settings should reject empty runtime secret values.""" - with pytest.raises(ValidationError, match="MINIO_ROOT_PASSWORD must be set"): - SharedSettings( - environment="production", - postgres_url="postgresql://user:pass@db.example.com:5432/workflows", - minio_root_password=" ", - ) +def test_non_local_settings_do_not_eagerly_reject_missing_runtime_dependencies() -> ( + None +): + """Non-local settings should construct so health/runtime checks can report issues.""" + settings = SharedSettings( + environment="production", + minio_root_password=" ", + ) + + assert settings.environment == "production" + assert ( + settings.postgres_url + == "postgresql://postgres:postgres@127.0.0.1:5432/workflows" + ) def test_sentry_environment_and_sampling_are_not_env_configurable( @@ -178,6 +184,12 @@ def test_shared_settings_newsletter_sync_interval_requires_one_minute() -> None: assert settings.newsletter_sync_interval_seconds == 60 +def test_shared_settings_newsletter_sync_defaults_disabled() -> None: + settings = SharedSettings() + + assert settings.newsletter_sync_enabled is False + + def test_local_service_defaults_target_host_runtime( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/unit/test_worker_config.py b/tests/unit/test_worker_config.py index fd2ec182..f5576e48 100644 --- a/tests/unit/test_worker_config.py +++ b/tests/unit/test_worker_config.py @@ -6,16 +6,19 @@ from five08.worker.config import WorkerSettings -def test_non_local_worker_requires_espo_config() -> None: - with pytest.raises(ValidationError, match="ESPO_BASE_URL and ESPO_API_KEY"): - WorkerSettings( - environment="production", - minio_root_password="secret", - espo_base_url="", - espo_api_key="test-key", - intake_resume_require_virus_scan=True, - intake_resume_virus_scan_command="clamdscan --fdpass {path}", - ) +def test_non_local_worker_allows_missing_espo_config() -> None: + settings = WorkerSettings( + environment="production", + postgres_url="postgresql://user:pass@db.example.com:5432/workflows", + minio_root_password="secret", + espo_base_url="", + espo_api_key="", + ) + + assert settings.espo_base_url == "" + assert settings.espo_api_key == "" + assert settings.crm_sync_enabled is True + assert settings.espo_configured is False def test_local_worker_allows_missing_espo_config() -> None: @@ -27,6 +30,7 @@ def test_local_worker_allows_missing_espo_config() -> None: assert settings.espo_base_url == "" assert settings.espo_api_key == "" + assert settings.espo_configured is False def test_email_intake_requires_mailbox_credentials() -> None: @@ -293,17 +297,16 @@ def test_intake_resume_virus_scan_is_not_required_by_default() -> None: assert settings.intake_resume_require_virus_scan is False -def test_intake_resume_virus_scan_is_required_in_non_local_environments() -> None: - with pytest.raises( - ValidationError, - match="INTAKE_RESUME_REQUIRE_VIRUS_SCAN must be true", - ): - WorkerSettings( - environment="production", - minio_root_password="secret", - espo_base_url="https://crm.test.com", - espo_api_key="test-key", - ) +def test_intake_resume_virus_scan_is_not_required_in_non_local_environments() -> None: + settings = WorkerSettings( + environment="production", + postgres_url="postgresql://user:pass@db.example.com:5432/workflows", + minio_root_password="secret", + espo_base_url="https://crm.test.com", + espo_api_key="test-key", + ) + + assert settings.intake_resume_require_virus_scan is False def test_intake_resume_virus_scan_requires_command_when_enabled() -> None: @@ -400,6 +403,7 @@ def test_auth_cookie_secure_is_true_for_non_local_even_if_legacy_env_is_false( settings = WorkerSettings( environment="production", + postgres_url="postgresql://user:pass@db.example.com:5432/workflows", espo_base_url="https://crm.test.com", espo_api_key="test-key", minio_root_password="secret", From 5d4e74020d0bf38acb0fcf0239a4badc7058447e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 11:13:38 +0900 Subject: [PATCH 02/10] Limit docker smoke workflow token permissions --- .github/workflows/docker-smoke.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/docker-smoke.yml b/.github/workflows/docker-smoke.yml index 196d4677..b6ee9127 100644 --- a/.github/workflows/docker-smoke.yml +++ b/.github/workflows/docker-smoke.yml @@ -4,6 +4,9 @@ on: merge_group: workflow_dispatch: +permissions: + contents: read + jobs: docker-smoke: runs-on: ubuntu-latest From 9daf23601056d9b476ab21e8cd96e6aca2bef04c Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 11:15:20 +0900 Subject: [PATCH 03/10] Allow on-demand docker smoke checks --- .github/workflows/docker-smoke.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/docker-smoke.yml b/.github/workflows/docker-smoke.yml index b6ee9127..4eb90527 100644 --- a/.github/workflows/docker-smoke.yml +++ b/.github/workflows/docker-smoke.yml @@ -2,6 +2,8 @@ name: Docker Smoke on: merge_group: + pull_request: + types: [labeled] workflow_dispatch: permissions: @@ -9,6 +11,7 @@ permissions: jobs: docker-smoke: + if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'docker-smoke') runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 From 3aea425c6f079155f772c63e27c03c03dc4afa28 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 11:29:24 +0900 Subject: [PATCH 04/10] Address deployment startup review feedback --- ENVIRONMENT.md | 15 +++++--- apps/api/src/five08/backend/api.py | 28 ++++++++++----- docs/configuration.md | 6 ++-- scripts/docker-smoke.sh | 10 ++++-- tests/unit/test_backend_api.py | 56 ++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 18 deletions(-) diff --git a/ENVIRONMENT.md b/ENVIRONMENT.md index 47f18332..0dd981ca 100644 --- a/ENVIRONMENT.md +++ b/ENVIRONMENT.md @@ -8,16 +8,21 @@ Postgres `+1`, Compose web `+2`, MinIO API `+3`, MinIO console `+4`, host-run web/API `+5`, and bot health `+6`. Explicit service port overrides keep their current precedence rules. -## Required +## Required For A Healthy Non-Local Runtime - `API_SHARED_SECRET` (required for protected endpoints) -- `POSTGRES_URL` (required in non-local environments) -- `MINIO_ROOT_PASSWORD` (required in non-local environments) +- `POSTGRES_URL` (required for database-backed API and worker health) +- `MINIO_ROOT_PASSWORD` (required for internal transfer storage) - `DISCORD_BOT_TOKEN` (Discord bot runtime) +The app avoids eager settings-construction failures where possible so failed +deployments can still expose logs and health responses. Missing runtime +dependencies should surface as degraded health or route/job failures rather than +Pydantic import errors. + ## Core Runtime (Bot + Worker) -- `Optional` (non-local): `ENVIRONMENT` (default: `local`; non-local environments must set explicit `POSTGRES_URL` and `MINIO_ROOT_PASSWORD`) +- `Optional` (non-local): `ENVIRONMENT` (default: `local`; non-local environments should set explicit `POSTGRES_URL` and `MINIO_ROOT_PASSWORD`) - `Optional`: `SENTRY_DSN` (default: unset; set to enable Sentry event capture) - `Optional`: `SENTRY_SEND_DEFAULT_PII` (default: `false`) - `Optional`: `SENTRY_DEBUG` (default: `false`) @@ -39,7 +44,7 @@ current precedence rules. ## Postgres + Compose Exposure -- `Required in non-local environments`: `POSTGRES_URL` (local default: `postgresql://postgres:postgres@127.0.0.1:5432/workflows`; `./scripts/dev.sh` overrides it to a deterministic per-worktree localhost port, Compose injects a Docker-network URL) +- `Required for healthy non-local runtime`: `POSTGRES_URL` (local default: `postgresql://postgres:postgres@127.0.0.1:5432/workflows`; `./scripts/dev.sh` overrides it to a deterministic per-worktree localhost port, Compose injects a Docker-network URL) - `Optional` (Compose DB container): `POSTGRES_DB` (default: `workflows`) - `Optional` (Compose DB container): `POSTGRES_USER` (default: `postgres`) - `Optional` (Compose DB container): `POSTGRES_PASSWORD` (default: `postgres`) diff --git a/apps/api/src/five08/backend/api.py b/apps/api/src/five08/backend/api.py index c8c1e75a..b4cd83cc 100644 --- a/apps/api/src/five08/backend/api.py +++ b/apps/api/src/five08/backend/api.py @@ -972,14 +972,15 @@ async def _crm_sync_scheduler(app: FastAPI) -> None: def _should_start_crm_sync_scheduler() -> bool: + return _crm_sync_scheduler_skip_reason() is None + + +def _crm_sync_scheduler_skip_reason() -> str | None: if not settings.crm_sync_enabled: - return False + return "disabled" if settings.espo_configured: - return True - logger.warning( - "CRM sync scheduler enabled but ESPO_BASE_URL/ESPO_API_KEY are not configured; skipping scheduler startup" - ) - return False + return None + return "missing_espo" async def _newsletter_sync_scheduler(app: FastAPI) -> None: @@ -3437,7 +3438,10 @@ async def health_handler(request: Request) -> JSONResponse: except Exception: redis_ok = False - if hasattr(request.app.state, "postgres_conn"): + postgres_migrations_ok = getattr(request.app.state, "postgres_migrations_ok", True) + if not postgres_migrations_ok: + postgres_ok = False + elif hasattr(request.app.state, "postgres_conn"): postgres_ok = await _is_postgres_connection_healthy(request.app) else: postgres_ok = await asyncio.to_thread(is_postgres_healthy, settings) @@ -3446,6 +3450,7 @@ async def health_handler(request: Request) -> JSONResponse: "status": "healthy" if redis_ok and postgres_ok else "degraded", "redis_connected": redis_ok, "postgres_connected": postgres_ok, + "postgres_migrations_ok": postgres_migrations_ok, "queue_name": settings.redis_queue_name, } return JSONResponse(payload, status_code=200 if redis_ok and postgres_ok else 503) @@ -9096,10 +9101,12 @@ async def _lifespan(app: FastAPI) -> Any: redis_conn = get_redis_connection(settings) app.state.redis_conn = redis_conn app.state.postgres_conn_lock = asyncio.Lock() + app.state.postgres_migrations_ok = True try: await asyncio.to_thread(run_job_migrations) except Exception: logger.exception("Failed to run job migrations during startup") + app.state.postgres_migrations_ok = False try: app.state.postgres_conn = await asyncio.to_thread( get_postgres_connection, @@ -9114,8 +9121,13 @@ async def _lifespan(app: FastAPI) -> Any: app.state.discord_admin_verifier = DiscordAdminVerifier(settings) app.state.http_client = httpx.AsyncClient(follow_redirects=False) - if _should_start_crm_sync_scheduler(): + crm_sync_skip_reason = _crm_sync_scheduler_skip_reason() + if crm_sync_skip_reason is None: app.state.crm_sync_task = asyncio.create_task(_crm_sync_scheduler(app)) + elif crm_sync_skip_reason == "missing_espo": + logger.warning( + "CRM sync scheduler enabled but ESPO_BASE_URL/ESPO_API_KEY are not configured; skipping scheduler startup" + ) else: logger.info("CRM sync scheduler disabled by config") diff --git a/docs/configuration.md b/docs/configuration.md index 9dbef44d..68a669dc 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -6,8 +6,10 @@ that most often matter in local development and deployment. ## Core Runtime -- `ENVIRONMENT`: defaults to `local`. Non-local values require explicit runtime - secrets such as `POSTGRES_URL` and `MINIO_ROOT_PASSWORD`. +- `ENVIRONMENT`: defaults to `local`. Non-local deployments should provide + explicit runtime secrets such as `POSTGRES_URL` and `MINIO_ROOT_PASSWORD`. + Missing runtime dependencies are reported through health/runtime failures + where possible instead of eager settings-construction errors. - `LOG_LEVEL`: defaults to `INFO`. - `API_SHARED_SECRET`: required for protected non-dashboard API routes and for `./scripts/dev.sh login`. diff --git a/scripts/docker-smoke.sh b/scripts/docker-smoke.sh index 68100c26..a7ffac37 100755 --- a/scripts/docker-smoke.sh +++ b/scripts/docker-smoke.sh @@ -4,7 +4,7 @@ set -eu script_dir=$(CDPATH= cd "$(dirname "$0")" && pwd) repo_root=$(CDPATH= cd "$script_dir/.." && pwd) project="five08-smoke-$(date +%s)-$$" -compose_file=$(mktemp "${TMPDIR:-/tmp}/five08-docker-smoke.XXXXXX.yaml") +compose_file=$(mktemp "${TMPDIR:-/tmp}/five08-docker-smoke.XXXXXX") body_file=$(mktemp "${TMPDIR:-/tmp}/five08-docker-smoke-body.XXXXXX") cleanup() { @@ -72,11 +72,13 @@ echo "Starting Docker smoke stack: $project" docker compose -p "$project" -f "$compose_file" up -d --build redis postgres web web_port="" -for _ in $(seq 1 60); do +attempts=0 +while [ "$attempts" -lt 60 ]; do web_port=$(docker compose -p "$project" -f "$compose_file" port web 8090 | sed 's/.*://') if [ -n "$web_port" ]; then break fi + attempts=$((attempts + 1)) sleep 1 done @@ -89,7 +91,8 @@ fi health_url="http://127.0.0.1:$web_port/health" echo "Waiting for $health_url" last_code="" -for _ in $(seq 1 60); do +attempts=0 +while [ "$attempts" -lt 60 ]; do if [ "$(docker inspect -f '{{.State.Running}}' "$(docker compose -p "$project" -f "$compose_file" ps -q web)" 2>/dev/null || echo false)" != "true" ]; then echo "web service exited before health check succeeded" >&2 docker compose -p "$project" -f "$compose_file" logs web >&2 || true @@ -101,6 +104,7 @@ for _ in $(seq 1 60); do echo "Docker smoke check passed." exit 0 fi + attempts=$((attempts + 1)) sleep 1 done diff --git a/tests/unit/test_backend_api.py b/tests/unit/test_backend_api.py index 3c4fb2cf..913ca9c9 100644 --- a/tests/unit/test_backend_api.py +++ b/tests/unit/test_backend_api.py @@ -34,6 +34,23 @@ def ping(self) -> bool: raise RuntimeError("redis unavailable") +class _FakePostgresConnection: + def cursor(self) -> "_FakePostgresConnection": + return self + + def __enter__(self) -> "_FakePostgresConnection": + return self + + def __exit__(self, *_args: object) -> None: + return None + + def execute(self, _query: str) -> None: + return None + + def close(self) -> None: + return None + + def test_crm_sync_scheduler_skips_start_without_espo_config( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -54,6 +71,17 @@ def test_crm_sync_scheduler_starts_when_espo_configured( assert api._should_start_crm_sync_scheduler() is True +def test_crm_sync_scheduler_skips_when_disabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(api.settings, "crm_sync_enabled", False) + monkeypatch.setattr(api.settings, "espo_base_url", "https://crm.example.com") + monkeypatch.setattr(api.settings, "espo_api_key", "secret") + + assert api._should_start_crm_sync_scheduler() is False + assert api._crm_sync_scheduler_skip_reason() == "disabled" + + def test_lifespan_serves_degraded_health_when_postgres_startup_fails( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -75,6 +103,33 @@ def fail_postgres(*_args: object, **_kwargs: object) -> None: assert response.json()["postgres_connected"] is False +def test_lifespan_keeps_health_degraded_when_migrations_fail( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fail_migrations() -> None: + raise RuntimeError("migration failed") + + monkeypatch.setattr(api, "get_redis_connection", lambda _settings: _HealthyRedis()) + monkeypatch.setattr(api, "run_job_migrations", fail_migrations) + monkeypatch.setattr( + api, + "get_postgres_connection", + lambda _settings: _FakePostgresConnection(), + ) + monkeypatch.setattr(api, "build_queue_client", Mock(return_value=Mock())) + monkeypatch.setattr(api.settings, "crm_sync_enabled", False) + monkeypatch.setattr(api.settings, "newsletter_sync_enabled", False) + monkeypatch.setattr(api.settings, "email_resume_intake_enabled", False) + + with TestClient(api.create_app(run_lifespan=True)) as degraded_client: + response = degraded_client.get("/health") + + payload = response.json() + assert response.status_code == 503 + assert payload["postgres_connected"] is False + assert payload["postgres_migrations_ok"] is False + + class _FakeAuthStore(api.RedisAuthStore): def __init__(self) -> None: self.saved_links: dict[str, object] = {} @@ -174,6 +229,7 @@ def test_health_handler_healthy(client: TestClient) -> None: payload = response.json() assert response.status_code == 200 assert payload["status"] == "healthy" + assert payload["postgres_migrations_ok"] is True def test_health_handler_degraded(app: api.FastAPI) -> None: From 93f6b6461fa1da4839b5e567a03839df8d4dc615 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 12:05:36 +0900 Subject: [PATCH 05/10] Enforce production resume scan before parsing --- apps/worker/src/five08/worker/config.py | 8 ++++ .../worker/crm/intake_form_processor.py | 2 +- tests/unit/test_intake_form_processor.py | 37 +++++++++++++++++++ tests/unit/test_worker_config.py | 6 ++- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/apps/worker/src/five08/worker/config.py b/apps/worker/src/five08/worker/config.py index 053fd519..e9c4f991 100644 --- a/apps/worker/src/five08/worker/config.py +++ b/apps/worker/src/five08/worker/config.py @@ -144,6 +144,14 @@ def espo_configured(self) -> bool: """Return true when EspoCRM credentials are available.""" return bool(self.espo_base_url.strip() and self.espo_api_key.strip()) + @property + def effective_intake_resume_require_virus_scan(self) -> bool: + """Return whether resume parsing must be preceded by malware scanning.""" + env = self.environment.strip().lower() + if env not in {"local", "dev", "development", "test"}: + return True + return self.intake_resume_require_virus_scan + @property def google_forms_allowed_form_ids_set(self) -> set[str]: """Allowed Google Forms IDs used by intake webhook validation.""" diff --git a/apps/worker/src/five08/worker/crm/intake_form_processor.py b/apps/worker/src/five08/worker/crm/intake_form_processor.py index 91fcecd5..d8db61b2 100644 --- a/apps/worker/src/five08/worker/crm/intake_form_processor.py +++ b/apps/worker/src/five08/worker/crm/intake_form_processor.py @@ -961,7 +961,7 @@ def _download_resume_content(self, resume_url: str) -> bytes: def _scan_resume_content(self, content: bytes, filename: str) -> bool: """Run the configured malware scanner before parsing untrusted resumes.""" - if not settings.intake_resume_require_virus_scan: + if not settings.effective_intake_resume_require_virus_scan: return True command_template = settings.intake_resume_virus_scan_command.strip() diff --git a/tests/unit/test_intake_form_processor.py b/tests/unit/test_intake_form_processor.py index 032a8cbd..195b6590 100644 --- a/tests/unit/test_intake_form_processor.py +++ b/tests/unit/test_intake_form_processor.py @@ -728,6 +728,43 @@ def test_build_resume_updates_skips_parsing_when_scan_fails() -> None: processor.document_processor.extract_text.assert_not_called() +def test_build_resume_updates_requires_scan_before_production_parsing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + processor = IntakeFormProcessor() + processor.document_processor = Mock() + response = Mock() + response.status_code = 200 + response.raise_for_status = Mock() + response.headers = {} + response.iter_content = Mock(return_value=[b"resume-bytes"]) + response.__enter__ = Mock(return_value=response) + response.__exit__ = Mock(return_value=None) + monkeypatch.setattr(intake_module.settings, "environment", "production") + monkeypatch.setattr( + intake_module.settings, + "intake_resume_require_virus_scan", + False, + ) + monkeypatch.setattr(intake_module.settings, "intake_resume_virus_scan_command", "") + + with ( + patch( + "five08.worker.crm.intake_form_processor.requests.get", + return_value=response, + ), + patch.object(processor, "_hostname_resolves_publicly", return_value=True), + ): + updates = processor._build_resume_updates( + { + "resume_url": "https://example.com/resume.pdf", + } + ) + + assert updates == {} + processor.document_processor.extract_text.assert_not_called() + + def test_resume_url_log_mask_strips_signed_query_parameters() -> None: processor = IntakeFormProcessor() diff --git a/tests/unit/test_worker_config.py b/tests/unit/test_worker_config.py index f5576e48..d0c4c995 100644 --- a/tests/unit/test_worker_config.py +++ b/tests/unit/test_worker_config.py @@ -295,9 +295,12 @@ def test_intake_resume_virus_scan_is_not_required_by_default() -> None: ) assert settings.intake_resume_require_virus_scan is False + assert settings.effective_intake_resume_require_virus_scan is False -def test_intake_resume_virus_scan_is_not_required_in_non_local_environments() -> None: +def test_intake_resume_virus_scan_is_runtime_required_in_non_local_environments() -> ( + None +): settings = WorkerSettings( environment="production", postgres_url="postgresql://user:pass@db.example.com:5432/workflows", @@ -307,6 +310,7 @@ def test_intake_resume_virus_scan_is_not_required_in_non_local_environments() -> ) assert settings.intake_resume_require_virus_scan is False + assert settings.effective_intake_resume_require_virus_scan is True def test_intake_resume_virus_scan_requires_command_when_enabled() -> None: From 4e632b1f20b9c56551ec0165f5dd9871e2ced3a1 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 12:37:27 +0900 Subject: [PATCH 06/10] Add ClamAV sidecar resume scanning --- .github/workflows/docker-smoke.yml | 2 +- ENVIRONMENT.md | 3 ++ apps/api/Dockerfile | 5 ++ apps/api/src/five08/backend/api.py | 9 +++- apps/worker/Dockerfile | 5 ++ apps/worker/src/five08/worker/config.py | 7 +++ .../worker/crm/intake_form_processor.py | 9 ++++ compose.yaml | 24 ++++++++++ docs/configuration.md | 11 +++-- scripts/docker-smoke.sh | 21 ++++++++- tests/unit/test_backend_api.py | 20 ++++++++ tests/unit/test_intake_form_processor.py | 47 ++++++++++++------- tests/unit/test_worker_config.py | 3 ++ 13 files changed, 142 insertions(+), 24 deletions(-) diff --git a/.github/workflows/docker-smoke.yml b/.github/workflows/docker-smoke.yml index 4eb90527..8b1e2a51 100644 --- a/.github/workflows/docker-smoke.yml +++ b/.github/workflows/docker-smoke.yml @@ -3,7 +3,7 @@ name: Docker Smoke on: merge_group: pull_request: - types: [labeled] + types: [labeled, synchronize] workflow_dispatch: permissions: diff --git a/ENVIRONMENT.md b/ENVIRONMENT.md index 0dd981ca..dafb1547 100644 --- a/ENVIRONMENT.md +++ b/ENVIRONMENT.md @@ -132,6 +132,9 @@ Pydantic import errors. - `Optional`: `INTAKE_RESUME_FETCH_TIMEOUT_SECONDS` (default: `20.0`; timeout for intake resume URL downloads) - `Optional`: `INTAKE_RESUME_MAX_REDIRECTS` (default: `3`; max redirects followed for intake resume URL downloads) - `Optional`: `INTAKE_RESUME_ALLOWED_HOSTS` (default: empty; optional comma-separated host allowlist for intake resume URL downloads) +- `Optional`: `INTAKE_RESUME_REQUIRE_VIRUS_SCAN` (default: `false` locally; resume parsing requires scanning automatically outside local/dev/test) +- `Required for non-local resume parsing`: `INTAKE_RESUME_VIRUS_SCAN_COMMAND` (Compose default: `clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}` against the ClamAV sidecar) +- `Optional`: `INTAKE_RESUME_VIRUS_SCAN_TIMEOUT_SECONDS` (default: `30.0`; timeout for the configured scan command) - `Optional`: `EMAIL_RESUME_INTAKE_ENABLED` (default: `false`; enables worker-side mailbox resume processing loop) - `Optional`: `EMAIL_RESUME_ALLOWED_EXTENSIONS` (default: `pdf,doc,docx`) - `Optional`: `EMAIL_RESUME_MAX_FILE_SIZE_MB` (default: `10`) diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index aa6d513b..cee7dd21 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -7,6 +7,11 @@ COPY --from=uv /uv /uvx /usr/local/bin/ WORKDIR /app ENV UV_LINK_MODE=copy +RUN apt-get update \ + && apt-get install -y --no-install-recommends clamdscan \ + && rm -rf /var/lib/apt/lists/* \ + && printf 'TCPSocket 3310\nTCPAddr clamav\n' >/etc/clamav/clamdscan.conf + COPY pyproject.toml uv.lock ./ COPY apps/discord_bot/pyproject.toml apps/discord_bot/pyproject.toml COPY apps/worker/pyproject.toml apps/worker/pyproject.toml diff --git a/apps/api/src/five08/backend/api.py b/apps/api/src/five08/backend/api.py index b4cd83cc..187fe55a 100644 --- a/apps/api/src/five08/backend/api.py +++ b/apps/api/src/five08/backend/api.py @@ -3446,14 +3446,19 @@ async def health_handler(request: Request) -> JSONResponse: else: postgres_ok = await asyncio.to_thread(is_postgres_healthy, settings) + intake_resume_scan_required = settings.effective_intake_resume_require_virus_scan + intake_resume_scan_configured = settings.intake_resume_virus_scan_configured + healthy = redis_ok and postgres_ok and intake_resume_scan_configured payload = { - "status": "healthy" if redis_ok and postgres_ok else "degraded", + "status": "healthy" if healthy else "degraded", "redis_connected": redis_ok, "postgres_connected": postgres_ok, "postgres_migrations_ok": postgres_migrations_ok, + "intake_resume_scan_required": intake_resume_scan_required, + "intake_resume_scan_configured": intake_resume_scan_configured, "queue_name": settings.redis_queue_name, } - return JSONResponse(payload, status_code=200 if redis_ok and postgres_ok else 503) + return JSONResponse(payload, status_code=200 if healthy else 503) async def ingest_handler(request: Request, source: str) -> JSONResponse: diff --git a/apps/worker/Dockerfile b/apps/worker/Dockerfile index ac3dcec4..52bd9259 100644 --- a/apps/worker/Dockerfile +++ b/apps/worker/Dockerfile @@ -7,6 +7,11 @@ COPY --from=uv /uv /uvx /usr/local/bin/ WORKDIR /app ENV UV_LINK_MODE=copy +RUN apt-get update \ + && apt-get install -y --no-install-recommends clamdscan \ + && rm -rf /var/lib/apt/lists/* \ + && printf 'TCPSocket 3310\nTCPAddr clamav\n' >/etc/clamav/clamdscan.conf + COPY pyproject.toml uv.lock ./ COPY apps/discord_bot/pyproject.toml apps/discord_bot/pyproject.toml COPY apps/worker/pyproject.toml apps/worker/pyproject.toml diff --git a/apps/worker/src/five08/worker/config.py b/apps/worker/src/five08/worker/config.py index e9c4f991..357165ae 100644 --- a/apps/worker/src/five08/worker/config.py +++ b/apps/worker/src/five08/worker/config.py @@ -152,6 +152,13 @@ def effective_intake_resume_require_virus_scan(self) -> bool: return True return self.intake_resume_require_virus_scan + @property + def intake_resume_virus_scan_configured(self) -> bool: + """Return whether the effective resume scan requirement has a command.""" + if not self.effective_intake_resume_require_virus_scan: + return True + return bool((self.intake_resume_virus_scan_command or "").strip()) + @property def google_forms_allowed_form_ids_set(self) -> set[str]: """Allowed Google Forms IDs used by intake webhook validation.""" diff --git a/apps/worker/src/five08/worker/crm/intake_form_processor.py b/apps/worker/src/five08/worker/crm/intake_form_processor.py index d8db61b2..a42ccc68 100644 --- a/apps/worker/src/five08/worker/crm/intake_form_processor.py +++ b/apps/worker/src/five08/worker/crm/intake_form_processor.py @@ -101,6 +101,10 @@ class _ResumeFileNotProvided: _RESUME_FILE_NOT_PROVIDED = _ResumeFileNotProvided() +class IntakeResumeScanConfigError(RuntimeError): + """Raised when resume processing requires malware scanning but lacks config.""" + + class IntakeFormProcessor: """Process a Google Forms member intake submission against CRM.""" @@ -675,6 +679,11 @@ def _prepare_resume_file( resume_url = self._normalize_text(payload.get("resume_url")) if not resume_url: return None + if not settings.intake_resume_virus_scan_configured: + raise IntakeResumeScanConfigError( + "INTAKE_RESUME_VIRUS_SCAN_COMMAND must be configured before " + "processing intake resumes in this environment" + ) resume_file_name = self._normalize_text(payload.get("resume_file_name")) resume_name = ( diff --git a/compose.yaml b/compose.yaml index 071cb65f..7e923485 100644 --- a/compose.yaml +++ b/compose.yaml @@ -59,6 +59,23 @@ services: condition: service_healthy restart: "no" + clamav: + image: clamav/clamav:1.4 + platform: ${CLAMAV_PLATFORM:-linux/amd64} + restart: unless-stopped + volumes: + - clamav-data:/var/lib/clamav + healthcheck: + test: + [ + "CMD-SHELL", + "tmp=$$(mktemp) && printf clean >$$tmp && clamdscan --stream --no-summary $$tmp >/dev/null 2>&1; code=$$?; rm -f $$tmp; exit $$code", + ] + interval: 30s + timeout: 10s + retries: 20 + start_period: 60s + discord_bot: build: context: . @@ -99,6 +116,7 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND:-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} # Keep the container's internal listen port fixed; vary only the published # host port for tunnels/local multi-worktree runs. WEB_HOST: 0.0.0.0 @@ -116,6 +134,8 @@ services: condition: service_healthy minio-init: condition: service_completed_successfully + clamav: + condition: service_healthy worker: build: @@ -145,6 +165,7 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND:-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} DISCORD_BOT_INTERNAL_BASE_URL: http://discord_bot:3000 restart: unless-stopped networks: @@ -159,11 +180,14 @@ services: condition: service_healthy minio-init: condition: service_completed_successfully + clamav: + condition: service_healthy volumes: redis-data: postgres-data: minio-data: + clamav-data: networks: infra: diff --git a/docs/configuration.md b/docs/configuration.md index 68a669dc..032b9716 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -67,11 +67,14 @@ dashboard field. performs CRM lookup and builds planned CRM updates, but skips CRM writes and intake DB persistence. - `INTAKE_RESUME_REQUIRE_VIRUS_SCAN`: when true, downloaded resume files are not - parsed unless the malware scan command succeeds. Defaults to false. + parsed unless the malware scan command succeeds. Defaults to false locally; + non-local runtimes require scanning for resume parsing even when this is unset. - `INTAKE_RESUME_VIRUS_SCAN_COMMAND`: command used to scan downloaded resumes. - Required when `INTAKE_RESUME_REQUIRE_VIRUS_SCAN=true`. Include `{path}` where - the temporary resume filepath should be inserted. When `{path}` is omitted, - the filepath is appended as the final argument. + Required when `INTAKE_RESUME_REQUIRE_VIRUS_SCAN=true` or when running outside + local/dev/test. Include `{path}` where the temporary resume filepath should be + inserted. When `{path}` is omitted, the filepath is appended as the final + argument. Production Compose uses the ClamAV sidecar command + `clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}`. - `INTAKE_RESUME_VIRUS_SCAN_TIMEOUT_SECONDS`: scan command timeout. ## Queue And Jobs diff --git a/scripts/docker-smoke.sh b/scripts/docker-smoke.sh index a7ffac37..9199f0dc 100755 --- a/scripts/docker-smoke.sh +++ b/scripts/docker-smoke.sh @@ -36,6 +36,20 @@ services: timeout: 2s retries: 15 + clamav: + image: clamav/clamav:1.4 + platform: ${CLAMAV_PLATFORM:-linux/amd64} + healthcheck: + test: + [ + "CMD-SHELL", + "tmp=\$\$(mktemp) && printf clean >\$\$tmp && clamdscan --stream --no-summary \$\$tmp >/dev/null 2>&1; code=\$\$?; rm -f \$\$tmp; exit \$\$code", + ] + interval: 10s + timeout: 10s + retries: 30 + start_period: 60s + web: build: context: "$repo_root" @@ -59,6 +73,7 @@ services: NEWSLETTER_SYNC_ENABLED: "false" EMAIL_RESUME_INTAKE_ENABLED: "false" INTAKE_RESUME_REQUIRE_VIRUS_SCAN: "false" + INTAKE_RESUME_VIRUS_SCAN_COMMAND: "clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}" ports: - "127.0.0.1::8090" depends_on: @@ -66,6 +81,8 @@ services: condition: service_healthy postgres: condition: service_healthy + clamav: + condition: service_healthy EOF echo "Starting Docker smoke stack: $project" @@ -100,7 +117,9 @@ while [ "$attempts" -lt 60 ]; do fi last_code=$(curl -sS -o "$body_file" -w "%{http_code}" "$health_url" || true) - if [ "$last_code" = "200" ] && grep -q '"status":"healthy"' "$body_file"; then + if [ "$last_code" = "200" ] && grep -Eq '"status"[[:space:]]*:[[:space:]]*"healthy"' "$body_file"; then + echo "Checking ClamAV scanner command" + docker compose -p "$project" -f "$compose_file" exec -T web sh -c 'tmp=$(mktemp); trap "rm -f \"$tmp\"" EXIT; printf clean >"$tmp"; clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf "$tmp"' echo "Docker smoke check passed." exit 0 fi diff --git a/tests/unit/test_backend_api.py b/tests/unit/test_backend_api.py index 913ca9c9..fca8fa85 100644 --- a/tests/unit/test_backend_api.py +++ b/tests/unit/test_backend_api.py @@ -230,6 +230,7 @@ def test_health_handler_healthy(client: TestClient) -> None: assert response.status_code == 200 assert payload["status"] == "healthy" assert payload["postgres_migrations_ok"] is True + assert payload["intake_resume_scan_configured"] is True def test_health_handler_degraded(app: api.FastAPI) -> None: @@ -244,6 +245,25 @@ def test_health_handler_degraded(app: api.FastAPI) -> None: assert payload["status"] == "degraded" +def test_health_handler_degraded_when_production_resume_scan_unconfigured( + app: api.FastAPI, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(api.settings, "environment", "production") + monkeypatch.setattr(api.settings, "intake_resume_require_virus_scan", False) + monkeypatch.setattr(api.settings, "intake_resume_virus_scan_command", "") + client = TestClient(app) + + with patch("five08.backend.api.is_postgres_healthy", return_value=True): + response = client.get("/health") + + payload = response.json() + assert response.status_code == 503 + assert payload["status"] == "degraded" + assert payload["intake_resume_scan_required"] is True + assert payload["intake_resume_scan_configured"] is False + + def test_ingest_handler_enqueues_job( client: TestClient, auth_headers: dict[str, str], diff --git a/tests/unit/test_intake_form_processor.py b/tests/unit/test_intake_form_processor.py index 195b6590..23a06c90 100644 --- a/tests/unit/test_intake_form_processor.py +++ b/tests/unit/test_intake_form_processor.py @@ -8,6 +8,7 @@ from five08.worker.crm import intake_form_processor as intake_module from five08.worker.crm.intake_form_processor import ( IntakeFormProcessor, + IntakeResumeScanConfigError, IntakeResumeFile, ) @@ -163,6 +164,34 @@ def test_create_prospect_does_not_retry_failed_resume_prepare() -> None: prepare.assert_called_once() +def test_process_intake_fails_when_production_resume_scan_unconfigured( + monkeypatch: pytest.MonkeyPatch, +) -> None: + processor = IntakeFormProcessor() + processor.api = MagicMock() + processor.api.request.return_value = {"list": []} + monkeypatch.setattr(intake_module.settings, "environment", "production") + monkeypatch.setattr( + intake_module.settings, + "intake_resume_require_virus_scan", + False, + ) + monkeypatch.setattr(intake_module.settings, "intake_resume_virus_scan_command", "") + + with pytest.raises(IntakeResumeScanConfigError): + processor.process_intake( + payload={ + "email": "new@example.com", + "first_name": "New", + "last_name": "Person", + "resume_url": "https://tally.so/resume.pdf", + "form_id": "form-1", + } + ) + + processor.api.upload_file.assert_not_called() + + def test_intake_form_processor_dry_run_update_does_not_write_crm_or_db() -> None: """Dry-run update should return planned updates without PUT or persistence.""" processor = IntakeFormProcessor() @@ -733,13 +762,6 @@ def test_build_resume_updates_requires_scan_before_production_parsing( ) -> None: processor = IntakeFormProcessor() processor.document_processor = Mock() - response = Mock() - response.status_code = 200 - response.raise_for_status = Mock() - response.headers = {} - response.iter_content = Mock(return_value=[b"resume-bytes"]) - response.__enter__ = Mock(return_value=response) - response.__exit__ = Mock(return_value=None) monkeypatch.setattr(intake_module.settings, "environment", "production") monkeypatch.setattr( intake_module.settings, @@ -748,20 +770,13 @@ def test_build_resume_updates_requires_scan_before_production_parsing( ) monkeypatch.setattr(intake_module.settings, "intake_resume_virus_scan_command", "") - with ( - patch( - "five08.worker.crm.intake_form_processor.requests.get", - return_value=response, - ), - patch.object(processor, "_hostname_resolves_publicly", return_value=True), - ): - updates = processor._build_resume_updates( + with pytest.raises(IntakeResumeScanConfigError): + processor._build_resume_updates( { "resume_url": "https://example.com/resume.pdf", } ) - assert updates == {} processor.document_processor.extract_text.assert_not_called() diff --git a/tests/unit/test_worker_config.py b/tests/unit/test_worker_config.py index d0c4c995..afa6c778 100644 --- a/tests/unit/test_worker_config.py +++ b/tests/unit/test_worker_config.py @@ -296,6 +296,7 @@ def test_intake_resume_virus_scan_is_not_required_by_default() -> None: assert settings.intake_resume_require_virus_scan is False assert settings.effective_intake_resume_require_virus_scan is False + assert settings.intake_resume_virus_scan_configured is True def test_intake_resume_virus_scan_is_runtime_required_in_non_local_environments() -> ( @@ -311,6 +312,7 @@ def test_intake_resume_virus_scan_is_runtime_required_in_non_local_environments( assert settings.intake_resume_require_virus_scan is False assert settings.effective_intake_resume_require_virus_scan is True + assert settings.intake_resume_virus_scan_configured is False def test_intake_resume_virus_scan_requires_command_when_enabled() -> None: @@ -335,6 +337,7 @@ def test_intake_resume_virus_scan_allows_enabled_with_command() -> None: ) assert settings.intake_resume_require_virus_scan is True + assert settings.intake_resume_virus_scan_configured is True def test_intake_resume_allowed_hostnames_normalizes_dots_and_empties() -> None: From 12dd306efcb0e5c9ff8e82cd55ace0af1653acbc Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 12:56:28 +0900 Subject: [PATCH 07/10] Keep ClamAV from gating Compose startup --- compose.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/compose.yaml b/compose.yaml index 7e923485..2ebd4f11 100644 --- a/compose.yaml +++ b/compose.yaml @@ -116,7 +116,7 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} - INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND:-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} # Keep the container's internal listen port fixed; vary only the published # host port for tunnels/local multi-worktree runs. WEB_HOST: 0.0.0.0 @@ -134,8 +134,6 @@ services: condition: service_healthy minio-init: condition: service_completed_successfully - clamav: - condition: service_healthy worker: build: @@ -165,7 +163,7 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} - INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND:-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} DISCORD_BOT_INTERNAL_BASE_URL: http://discord_bot:3000 restart: unless-stopped networks: @@ -180,8 +178,6 @@ services: condition: service_healthy minio-init: condition: service_completed_successfully - clamav: - condition: service_healthy volumes: redis-data: From 4a4486479bd52bbde6d9f1406a3cf92a21a53eb2 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 13:12:32 +0900 Subject: [PATCH 08/10] Clarify optional ClamAV startup behavior --- apps/api/src/five08/backend/api.py | 5 +++-- compose.yaml | 4 ++++ tests/unit/test_backend_api.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/apps/api/src/five08/backend/api.py b/apps/api/src/five08/backend/api.py index 187fe55a..c0873eaa 100644 --- a/apps/api/src/five08/backend/api.py +++ b/apps/api/src/five08/backend/api.py @@ -1046,8 +1046,9 @@ async def _is_postgres_connection_healthy(app: FastAPI) -> bool: if healthy: return True - with contextlib.suppress(Exception): - await asyncio.to_thread(connection.close) + if connection is not None: + with contextlib.suppress(Exception): + await asyncio.to_thread(connection.close) try: refreshed = await asyncio.to_thread(get_postgres_connection, settings) diff --git a/compose.yaml b/compose.yaml index 2ebd4f11..917d2524 100644 --- a/compose.yaml +++ b/compose.yaml @@ -116,6 +116,8 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} + # Use '-' intentionally: an explicitly blank value means "scanner not configured", + # allowing startup while /health reports degraded in non-local runtimes. INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} # Keep the container's internal listen port fixed; vary only the published # host port for tunnels/local multi-worktree runs. @@ -163,6 +165,8 @@ services: MINIO_ROOT_USER: ${MINIO_ROOT_USER:-internal} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-change-me} MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} + # Use '-' intentionally: an explicitly blank value means "scanner not configured", + # allowing startup while /health reports degraded in non-local runtimes. INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} DISCORD_BOT_INTERNAL_BASE_URL: http://discord_bot:3000 restart: unless-stopped diff --git a/tests/unit/test_backend_api.py b/tests/unit/test_backend_api.py index fca8fa85..bd88fb78 100644 --- a/tests/unit/test_backend_api.py +++ b/tests/unit/test_backend_api.py @@ -130,6 +130,21 @@ def fail_migrations() -> None: assert payload["postgres_migrations_ok"] is False +async def test_postgres_health_handles_missing_connection( + monkeypatch: pytest.MonkeyPatch, +) -> None: + app = api.create_app(run_lifespan=False) + app.state.postgres_conn_lock = asyncio.Lock() + app.state.postgres_conn = None + + def fail_postgres(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("postgres unavailable") + + monkeypatch.setattr(api, "get_postgres_connection", fail_postgres) + + assert await api._is_postgres_connection_healthy(app) is False + + class _FakeAuthStore(api.RedisAuthStore): def __init__(self) -> None: self.saved_links: dict[str, object] = {} From 202495710c1b6aa989a8246ff761c4fe01d4493e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 16:07:25 +0900 Subject: [PATCH 09/10] Avoid suppressing ClamAV compose defaults --- .env.example | 4 +++- .../worker/crm/intake_form_processor.py | 19 ++++++++++++++++- tests/unit/test_intake_form_processor.py | 21 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 771aace8..f18df6aa 100644 --- a/.env.example +++ b/.env.example @@ -247,8 +247,10 @@ ONBOARDING_TALLY_WEBHOOK_SIGNING_SECRET= ONBOARDING_TALLY_ALLOWED_FORM_IDS= # Resume files are untrusted. Keep scanning required in production and configure # a scanner command; use "{path}" where the downloaded resume path should go. +# Compose deployments provide a ClamAV sidecar command by default. Uncomment +# and set this only when intentionally overriding or disabling that command. INTAKE_RESUME_REQUIRE_VIRUS_SCAN=false -INTAKE_RESUME_VIRUS_SCAN_COMMAND= +# INTAKE_RESUME_VIRUS_SCAN_COMMAND= INTAKE_RESUME_VIRUS_SCAN_TIMEOUT_SECONDS=30.0 EMAIL_RESUME_INTAKE_ENABLED=false EMAIL_RESUME_ALLOWED_EXTENSIONS=pdf,doc,docx diff --git a/apps/worker/src/five08/worker/crm/intake_form_processor.py b/apps/worker/src/five08/worker/crm/intake_form_processor.py index a42ccc68..b4b028fd 100644 --- a/apps/worker/src/five08/worker/crm/intake_form_processor.py +++ b/apps/worker/src/five08/worker/crm/intake_form_processor.py @@ -105,6 +105,10 @@ class IntakeResumeScanConfigError(RuntimeError): """Raised when resume processing requires malware scanning but lacks config.""" +class IntakeResumeScanExecutionError(RuntimeError): + """Raised when the malware scanner cannot complete a required scan.""" + + class IntakeFormProcessor: """Process a Google Forms member intake submission against CRM.""" @@ -1005,7 +1009,9 @@ def _scan_resume_content(self, content: bytes, filename: str) -> bool: logger.warning( "Resume malware scan failed filename=%s error=%s", filename, exc ) - return False + raise IntakeResumeScanExecutionError( + f"Resume malware scan failed for {filename}" + ) from exc finally: if temp_path: with contextlib.suppress(OSError): @@ -1014,6 +1020,17 @@ def _scan_resume_content(self, content: bytes, filename: str) -> bool: if result.returncode == 0: return True + if result.returncode > 1: + logger.warning( + "Resume malware scan errored filename=%s returncode=%s stderr=%s", + filename, + result.returncode, + result.stderr.strip(), + ) + raise IntakeResumeScanExecutionError( + f"Resume malware scan errored for {filename}" + ) + logger.warning( "Resume malware scan rejected filename=%s returncode=%s stderr=%s", filename, diff --git a/tests/unit/test_intake_form_processor.py b/tests/unit/test_intake_form_processor.py index 23a06c90..5e55a2af 100644 --- a/tests/unit/test_intake_form_processor.py +++ b/tests/unit/test_intake_form_processor.py @@ -9,6 +9,7 @@ from five08.worker.crm.intake_form_processor import ( IntakeFormProcessor, IntakeResumeScanConfigError, + IntakeResumeScanExecutionError, IntakeResumeFile, ) @@ -757,6 +758,26 @@ def test_build_resume_updates_skips_parsing_when_scan_fails() -> None: processor.document_processor.extract_text.assert_not_called() +def test_scan_resume_content_raises_when_scanner_errors( + monkeypatch: pytest.MonkeyPatch, +) -> None: + processor = IntakeFormProcessor() + result = Mock(returncode=2, stderr="clamd unavailable") + monkeypatch.setattr(intake_module.settings, "environment", "production") + monkeypatch.setattr( + intake_module.settings, + "intake_resume_virus_scan_command", + "clamdscan --stream --no-summary {path}", + ) + mock_run = Mock(return_value=result) + monkeypatch.setattr(intake_module.subprocess, "run", mock_run) + + with pytest.raises(IntakeResumeScanExecutionError): + processor._scan_resume_content(b"resume-bytes", "resume.pdf") + + mock_run.assert_called_once() + + def test_build_resume_updates_requires_scan_before_production_parsing( monkeypatch: pytest.MonkeyPatch, ) -> None: From 7cfb7ea48eddaf15cb187388d6ddb5541b063426 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Mon, 29 Jun 2026 16:19:48 +0900 Subject: [PATCH 10/10] Fix health and scanner command defaults --- apps/api/src/five08/backend/api.py | 11 ++++++---- compose.yaml | 4 ++-- tests/unit/test_backend_api.py | 2 +- tests/unit/test_intake_form_processor.py | 26 ++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/apps/api/src/five08/backend/api.py b/apps/api/src/five08/backend/api.py index c0873eaa..4de02d75 100644 --- a/apps/api/src/five08/backend/api.py +++ b/apps/api/src/five08/backend/api.py @@ -3440,16 +3440,19 @@ async def health_handler(request: Request) -> JSONResponse: redis_ok = False postgres_migrations_ok = getattr(request.app.state, "postgres_migrations_ok", True) - if not postgres_migrations_ok: - postgres_ok = False - elif hasattr(request.app.state, "postgres_conn"): + if hasattr(request.app.state, "postgres_conn"): postgres_ok = await _is_postgres_connection_healthy(request.app) else: postgres_ok = await asyncio.to_thread(is_postgres_healthy, settings) intake_resume_scan_required = settings.effective_intake_resume_require_virus_scan intake_resume_scan_configured = settings.intake_resume_virus_scan_configured - healthy = redis_ok and postgres_ok and intake_resume_scan_configured + healthy = ( + redis_ok + and postgres_ok + and postgres_migrations_ok + and intake_resume_scan_configured + ) payload = { "status": "healthy" if healthy else "degraded", "redis_connected": redis_ok, diff --git a/compose.yaml b/compose.yaml index 917d2524..70d12a39 100644 --- a/compose.yaml +++ b/compose.yaml @@ -118,7 +118,7 @@ services: MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} # Use '-' intentionally: an explicitly blank value means "scanner not configured", # allowing startup while /health reports degraded in non-local runtimes. - INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf} # Keep the container's internal listen port fixed; vary only the published # host port for tunnels/local multi-worktree runs. WEB_HOST: 0.0.0.0 @@ -167,7 +167,7 @@ services: MINIO_INTERNAL_BUCKET: ${MINIO_INTERNAL_BUCKET:-internal-transfers} # Use '-' intentionally: an explicitly blank value means "scanner not configured", # allowing startup while /health reports degraded in non-local runtimes. - INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf {path}} + INTAKE_RESUME_VIRUS_SCAN_COMMAND: ${INTAKE_RESUME_VIRUS_SCAN_COMMAND-clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf} DISCORD_BOT_INTERNAL_BASE_URL: http://discord_bot:3000 restart: unless-stopped networks: diff --git a/tests/unit/test_backend_api.py b/tests/unit/test_backend_api.py index bd88fb78..55aeb3b9 100644 --- a/tests/unit/test_backend_api.py +++ b/tests/unit/test_backend_api.py @@ -126,7 +126,7 @@ def fail_migrations() -> None: payload = response.json() assert response.status_code == 503 - assert payload["postgres_connected"] is False + assert payload["postgres_connected"] is True assert payload["postgres_migrations_ok"] is False diff --git a/tests/unit/test_intake_form_processor.py b/tests/unit/test_intake_form_processor.py index 5e55a2af..5d059612 100644 --- a/tests/unit/test_intake_form_processor.py +++ b/tests/unit/test_intake_form_processor.py @@ -778,6 +778,32 @@ def test_scan_resume_content_raises_when_scanner_errors( mock_run.assert_called_once() +def test_scan_resume_content_appends_path_when_placeholder_omitted( + monkeypatch: pytest.MonkeyPatch, +) -> None: + processor = IntakeFormProcessor() + result = Mock(returncode=0, stderr="") + monkeypatch.setattr(intake_module.settings, "environment", "production") + monkeypatch.setattr( + intake_module.settings, + "intake_resume_virus_scan_command", + "clamdscan --stream --no-summary --config-file=/etc/clamav/clamdscan.conf", + ) + mock_run = Mock(return_value=result) + monkeypatch.setattr(intake_module.subprocess, "run", mock_run) + + assert processor._scan_resume_content(b"resume-bytes", "resume.pdf") is True + + command = mock_run.call_args.args[0] + assert command[:4] == [ + "clamdscan", + "--stream", + "--no-summary", + "--config-file=/etc/clamav/clamdscan.conf", + ] + assert command[-1].endswith(".pdf") + + def test_build_resume_updates_requires_scan_before_production_parsing( monkeypatch: pytest.MonkeyPatch, ) -> None: