diff --git a/.env.example b/.env.example index dc21172..6719869 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,11 @@ +# Paperscout environment configuration +# +# Keys below use legacy unprefixed names and are read from this .env file. +# To override any setting from the process environment (Docker -e, shell export, +# CI env blocks), use the canonical PAPERSCOUT_ prefix instead, e.g. +# PAPERSCOUT_DATABASE_URL. Prefixed names are not read from .env files. +# See src/paperscout/config.py (ENV_VAR_MAP) for the full 1:1 field mapping. +# # --- Required credentials --- # Slack (required for production; tests set _PAPERSCOUT_TESTING=1 to skip validation) SLACK_SIGNING_SECRET=your-signing-secret @@ -14,11 +22,13 @@ HEALTH_PORT=8080 HEALTH_BIND_HOST=127.0.0.1 # --- Scheduling (optional) --- +ENABLE_BULK_WG21=true +ENABLE_ISO_PROBE=true +ENABLE_OPEN_STD=false POLL_INTERVAL_MINUTES=30 # Min sleep after an overrun cycle (poll took > poll_interval_minutes). POLL_OVERRUN_COOLDOWN_SECONDS=300 -ENABLE_BULK_WG21=true -ENABLE_ISO_PROBE=true + # --- Probe prefixes / extensions (optional) --- PROBE_PREFIXES=["D","P"] @@ -74,6 +84,12 @@ CACHE_TTL_HOURS=1 LOG_LEVEL=INFO LOG_RETENTION_DAYS=7 +# --- Message queue internals (optional, rarely tuned) --- +# MQ_MAX_RETRIES, MQ_MAX_SIZE, MQ_CIRCUIT_BREAKER_THRESHOLD, +# MQ_CIRCUIT_BREAKER_COOLDOWN_SECONDS — see ENV_VAR_MAP in config.py +# +# Also available but omitted here: WG21_INDEX_TIMEOUT_S + # --- Graceful shutdown (optional) --- # Max time to drain queued Slack messages on SIGTERM/SIGINT shutdown. SHUTDOWN_MQ_DRAIN_TIMEOUT_SECONDS=30 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 201ac8a..28f856f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,7 @@ on: env: _PAPERSCOUT_TESTING: "1" + # Legacy unprefixed names; canonical form is PAPERSCOUT_ (see ENV_VAR_MAP in config.py). SLACK_BOT_TOKEN: xoxb-ci-placeholder SLACK_SIGNING_SECRET: ci-placeholder-secret diff --git a/README.md b/README.md index 76abf57..6abfc0d 100644 --- a/README.md +++ b/README.md @@ -219,21 +219,25 @@ When a match is found, all hits for that user are batched and sent as a single D All parameters are configurable via environment variables or a `.env` file. See [`.env.example`](.env.example) for the complete list. +**Canonical process-env names:** every setting can be overridden from the process environment using `PAPERSCOUT_` (e.g. `PAPERSCOUT_DATABASE_URL` for `database_url`). Legacy unprefixed names (`DATABASE_URL`, etc.) remain supported in `.env` files and as process-env fallbacks. Prefixed names take precedence when both are set. See [`src/paperscout/config.py`](src/paperscout/config.py) (`ENV_VAR_MAP`) for the full mapping. + +Tables below list legacy `.env` / process-env names. For any row, the canonical process-env override is `PAPERSCOUT_` + the field name in uppercase (e.g. `PORT` → `PAPERSCOUT_PORT`, `POLL_INTERVAL_MINUTES` → `PAPERSCOUT_POLL_INTERVAL_MINUTES`). See `ENV_VAR_MAP` in [`config.py`](src/paperscout/config.py) for the complete list. + ### Required -| Variable | Description | -| ---------------------- | -------------------------------------------------------------------- | -| `SLACK_SIGNING_SECRET` | Slack app signing secret | -| `SLACK_BOT_TOKEN` | Slack bot token (`xoxb-...`) | -| `DATABASE_URL` | PostgreSQL connection string (`postgresql://user:pass@host:5432/db`) | +| Variable | Canonical process-env | Description | +| -------- | --------------------- | ----------- | +| `SLACK_SIGNING_SECRET` | `PAPERSCOUT_SLACK_SIGNING_SECRET` | Slack app signing secret | +| `SLACK_BOT_TOKEN` | `PAPERSCOUT_SLACK_BOT_TOKEN` | Slack bot token (`xoxb-...`) | +| `DATABASE_URL` | `PAPERSCOUT_DATABASE_URL` | PostgreSQL connection string (`postgresql://user:pass@host:5432/db`) | ### Server -| Variable | Default | Description | -| -------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -| `PORT` | `3000` | Slack Bolt HTTP listener | -| `HEALTH_PORT` | `8080` | GET `/health` JSON endpoint | -| `HEALTH_BIND_HOST` | `127.0.0.1` | Bind address for the health server (localhost-only). Use `0.0.0.0` inside Docker when publishing ports to the host; see `docker-compose.yml`. | +| Variable | Default | Description | +| ------------------ | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| `PORT` | `3000` | Slack Bolt HTTP listener | +| `HEALTH_PORT` | `8080` | GET `/health` JSON endpoint | +| `HEALTH_BIND_HOST` | `127.0.0.1` | Bind address for the health server (localhost-only). Use `0.0.0.0` inside Docker when publishing ports to the host; see `docker-compose.yml`. | ### Scheduling diff --git a/deploy/SERVER_SETUP.md b/deploy/SERVER_SETUP.md index 17a5058..5200839 100644 --- a/deploy/SERVER_SETUP.md +++ b/deploy/SERVER_SETUP.md @@ -198,6 +198,8 @@ cp .env.example .env nano .env ``` +Process-environment overrides (e.g. `docker compose run -e PAPERSCOUT_POLL_INTERVAL_MINUTES=60`) use the canonical `PAPERSCOUT_` prefix documented in [`src/paperscout/config.py`](../src/paperscout/config.py) (`ENV_VAR_MAP`). The `.env` file continues to use legacy unprefixed names. + The `DATABASE_URL` should use `host.docker.internal`: ``` diff --git a/docs/onboarding.md b/docs/onboarding.md index 4a025d6..4310e9f 100644 --- a/docs/onboarding.md +++ b/docs/onboarding.md @@ -60,6 +60,8 @@ cp .env.example .env - `DATABASE_URL` — e.g. `postgresql://paperscout:your-secret@localhost:5432/paperscout` - For Slack: `SLACK_SIGNING_SECRET`, `SLACK_BOT_TOKEN` — see [README § Slack App Setup](../README.md#slack-app-setup) +**Process-env overrides:** Docker `-e`, shell `export`, and CI env blocks can use the canonical `PAPERSCOUT_` prefix (e.g. `PAPERSCOUT_DATABASE_URL`) to override any setting without editing `.env`. Legacy unprefixed names in `.env` remain fully supported; see [`config.py`](../src/paperscout/config.py) (`ENV_VAR_MAP`). + ### 4. Run tests Fast run (no coverage gate): @@ -124,7 +126,7 @@ Use [deploy/paperscout.conf](../deploy/paperscout.conf) as a reference for TLS t ## Environment variables (complete reference) -Every key from [`.env.example`](../.env.example) is listed below. Names in `.env` use **SCREAMING_SNAKE_CASE**; the runtime [Settings](../src/paperscout/config.py) class maps them to lowercase fields. +Every key from [`.env.example`](../.env.example) is listed below. Names in `.env` use **SCREAMING_SNAKE_CASE**; the runtime [Settings](../src/paperscout/config.py) class maps them to lowercase fields. Process-environment overrides (Docker `-e`, CI) may use the canonical `PAPERSCOUT_` form documented in `ENV_VAR_MAP` in `config.py`. ### Slack and server diff --git a/src/paperscout/config.py b/src/paperscout/config.py index 7e3955b..eb45f61 100644 --- a/src/paperscout/config.py +++ b/src/paperscout/config.py @@ -4,15 +4,69 @@ import os from pathlib import Path +from typing import NamedTuple from pydantic import Field, model_validator -from pydantic_settings import BaseSettings, SettingsConfigDict +from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict +from pydantic_settings.sources import EnvSettingsSource from .errors import ConfigurationError +ENV_VAR_PREFIX = "PAPERSCOUT_" + + +def prefixed_env_name(field_name: str) -> str: + """Canonical process-env name for a ``Settings`` field.""" + return f"{ENV_VAR_PREFIX}{field_name.upper()}" + + +def legacy_env_name(field_name: str) -> str: + """Legacy unprefixed env / ``.env`` key for a ``Settings`` field. + + Assumes no ``Settings`` field uses ``validation_alias`` or ``alias``; env + keys match the Python field name uppercased. If a field gains a custom + alias, update this helper to read ``Settings.model_fields[field_name]``. + """ + return field_name.upper() + + +class EnvVarNames(NamedTuple): + prefixed: str # PAPERSCOUT_ + legacy: str # legacy env / .env key + + +class _PrefixedEnvSource(EnvSettingsSource): + """Reads ``PAPERSCOUT_`` from the process environment (not ``.env``). + + ``EnvSettingsSource`` reads ``os.environ`` only; ``.env`` files are loaded + separately by ``DotEnvSettingsSource`` in ``settings_customise_sources``. + """ + + def __init__(self, settings_cls: type[BaseSettings]) -> None: + super().__init__(settings_cls, env_prefix=ENV_VAR_PREFIX) + class Settings(BaseSettings): - """Application settings loaded from environment and optional ``.env``.""" + """Application settings loaded from environment and optional ``.env``. + + Every field is a flat top-level attribute (e.g. ``settings.slack_bot_token``). + There are no nested configuration namespaces. + + Environment variable mapping (1:1 per field): + + * **Canonical process-env override:** ``PAPERSCOUT_`` + (e.g. ``PAPERSCOUT_DATABASE_URL``). Use in Docker ``-e``, shell + ``export``, or CI env blocks. These keys are **not** read from the + ``.env`` file. + * **Legacy name:** ```` (e.g. ``DATABASE_URL``). Used in + ``.env`` files and as a process-env fallback. Unprefixed names remain + fully supported. + + Source priority (highest to lowest): constructor kwargs, ``PAPERSCOUT_*`` + process env, legacy process env / ``.env``, field defaults. + + See :data:`ENV_VAR_MAP` for the full field list. + """ model_config = SettingsConfigDict( env_file=".env", @@ -116,6 +170,23 @@ class Settings(BaseSettings): # (mq_drain + 2 × thread_join) meets or exceeds this value. stop_grace_period_seconds: float = Field(default=0.0, ge=0.0) + @classmethod + def settings_customise_sources( + cls, + settings_cls: type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> tuple[PydanticBaseSettingsSource, ...]: + return ( + init_settings, + _PrefixedEnvSource(settings_cls), + env_settings, + dotenv_settings, + file_secret_settings, + ) + @model_validator(mode="after") def _require_slack_credentials_unless_testing(self) -> Settings: """Slack tokens must be set for real runs; pytest sets ``_PAPERSCOUT_TESTING=1``.""" @@ -132,4 +203,10 @@ def _require_slack_credentials_unless_testing(self) -> Settings: return self +ENV_VAR_MAP: dict[str, EnvVarNames] = { + name: EnvVarNames(prefixed_env_name(name), legacy_env_name(name)) + for name in Settings.model_fields +} + + settings = Settings() diff --git a/tests/test_config.py b/tests/test_config.py index b981da7..7d5e277 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,7 +4,7 @@ import pytest -from paperscout.config import Settings +from paperscout.config import ENV_VAR_MAP, Settings, legacy_env_name, prefixed_env_name from paperscout.errors import ConfigurationError @@ -30,3 +30,64 @@ def test_settings_allows_empty_slack_under_testing_flag(monkeypatch): monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") s = Settings(slack_bot_token="", slack_signing_secret="") assert s.slack_bot_token == "" + + +def test_prefixed_process_env_loads(monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.setenv("PAPERSCOUT_POLL_INTERVAL_MINUTES", "99") + s = Settings() + assert s.poll_interval_minutes == 99 + + +def test_legacy_process_env_fallback(monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.delenv("PAPERSCOUT_POLL_INTERVAL_MINUTES", raising=False) + monkeypatch.setenv("POLL_INTERVAL_MINUTES", "88") + s = Settings() + assert s.poll_interval_minutes == 88 + + +def test_prefixed_wins_over_legacy(monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.setenv("PAPERSCOUT_POLL_INTERVAL_MINUTES", "99") + monkeypatch.setenv("POLL_INTERVAL_MINUTES", "88") + s = Settings() + assert s.poll_interval_minutes == 99 + + +def test_prefixed_process_env_loads_bool(monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.setenv("PAPERSCOUT_ENABLE_ISO_PROBE", "false") + s = Settings() + assert s.enable_iso_probe is False + + +def test_env_mapping_covers_all_fields(): + assert set(ENV_VAR_MAP.keys()) == set(Settings.model_fields.keys()) + db = ENV_VAR_MAP["database_url"] + assert db.prefixed == "PAPERSCOUT_DATABASE_URL" + assert db.legacy == "DATABASE_URL" + for field_name in Settings.model_fields: + names = ENV_VAR_MAP[field_name] + assert names.prefixed == prefixed_env_name(field_name) + assert names.legacy == legacy_env_name(field_name) + + +def test_prefixed_keys_in_dotenv_file_are_ignored(tmp_path, monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.delenv("PAPERSCOUT_POLL_INTERVAL_MINUTES", raising=False) + monkeypatch.delenv("POLL_INTERVAL_MINUTES", raising=False) + env_file = tmp_path / ".env" + env_file.write_text("PAPERSCOUT_POLL_INTERVAL_MINUTES=77\n") + s = Settings(_env_file=env_file) + assert s.poll_interval_minutes == 30 + + +def test_legacy_keys_in_dotenv_file_load(tmp_path, monkeypatch): + monkeypatch.setenv("_PAPERSCOUT_TESTING", "1") + monkeypatch.delenv("PAPERSCOUT_POLL_INTERVAL_MINUTES", raising=False) + monkeypatch.delenv("POLL_INTERVAL_MINUTES", raising=False) + env_file = tmp_path / ".env" + env_file.write_text("POLL_INTERVAL_MINUTES=66\n") + s = Settings(_env_file=env_file) + assert s.poll_interval_minutes == 66