diff --git a/Dockerfile b/Dockerfile index ab81318..2160251 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,6 +37,8 @@ COPY --from=builder /usr/local/bin/warmup /usr/local/bin/warmup ARG EMBEDDING_MODELS=nomic,bge-small RUN EMBEDDING_MODELS="${EMBEDDING_MODELS}" EMBEDDING_POOL_SIZE=1 /usr/local/bin/warmup +# EXPOSE is build-time metadata only; the actual port is controlled by the +# EMBEDDING_PORT env var at runtime (default 3000). EXPOSE 3000 ENTRYPOINT ["/usr/local/bin/embedding"] diff --git a/README.md b/README.md index efa5ae5..a78b556 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,17 @@ curl -X POST http://localhost:3000/embed \ -d '{"texts":["hello world","another piece of text"]}' ``` +## Configuration + +Configured via environment variables (set them in `.env`): + +| Variable | Default | Description | +| --- | --- | --- | +| `EMBEDDING_PORT` | `3000` | Port the service listens on. | +| `EMBEDDING_MODELS` | `nomic` | Comma-separated list of models to load. | +| `EMBEDDING_CACHE_DIR` | _(default cache)_ | Directory for downloaded model files. | +| `EMBEDDING_POOL_SIZE` | _(memory-derived)_ | Number of model instances per pool. | + ## API ### `POST /embed` diff --git a/docker-compose.yml b/docker-compose.yml index 84dfd48..3e3f08c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,7 +8,7 @@ services: image: embedding:latest container_name: embedding ports: - - "3000:3000" + - "${EMBEDDING_PORT:-3000}:${EMBEDDING_PORT:-3000}" env_file: - .env volumes: diff --git a/src/main.rs b/src/main.rs index 6aac642..b983d9e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -82,7 +82,14 @@ async fn main() -> Result<(), Box> { let app = Router::new().route("/embed", post(embed)).with_state(state); - let addr = std::env::var("BIND_ADDR").unwrap_or_else(|_| "0.0.0.0:3000".to_string()); + let port_str = std::env::var("EMBEDDING_PORT").unwrap_or_else(|_| "3000".to_string()); + let port: u16 = port_str.parse().map_err(|_| { + format!( + "EMBEDDING_PORT '{}' is not a valid port number (1-65535)", + port_str + ) + })?; + let addr = format!("0.0.0.0:{}", port); let listener = tokio::net::TcpListener::bind(&addr).await?; tracing::info!("listening on {}", addr); axum::serve(listener, app)