diff --git a/crates/forge_app/src/dto/openai/fixtures/llamacpp_models_response.json b/crates/forge_app/src/dto/openai/fixtures/llamacpp_models_response.json new file mode 100644 index 0000000000..c93dad0bd2 --- /dev/null +++ b/crates/forge_app/src/dto/openai/fixtures/llamacpp_models_response.json @@ -0,0 +1,91 @@ +{ + "data": [ + { + "id": "GLM-4.7-Flash-GGUF", + "aliases": [], + "tags": [], + "object": "model", + "owned_by": "llamacpp", + "created": 1781360984, + "status": { + "value": "unloaded", + "args": [ + "/usr/bin/llama-server", + "--host", + "127.0.0.1", + "--port", + "0", + "--ui-mcp-proxy", + "--alias", + "GLM-4.7-Flash-GGUF", + "--ctx-size", + "135000", + "--model", + "/home/greg/.lmstudio/models/lmstudio-community/GLM-4.7-Flash-GGUF/GLM-4.7-Flash-Q4_K_M.gguf", + "--parallel", + "1" + ], + "preset": "[GLM-4.7-Flash-GGUF]\nui-mcp-proxy = 1\nctx-size = 135000\nmodel = /home/greg/.lmstudio/models/lmstudio-community/GLM-4.7-Flash-GGUF/GLM-4.7-Flash-Q4_K_M.gguf\nparallel = 1\n\n" + }, + "architecture": { + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ] + }, + "need_download": false + }, + { + "id": "Qwen3.6-35B-A3B-GGUF", + "aliases": [], + "tags": [], + "object": "model", + "owned_by": "llamacpp", + "created": 1781360984, + "status": { + "value": "loaded", + "args": [ + "/usr/bin/llama-server", + "--host", + "127.0.0.1", + "--port", + "49759", + "--ui-mcp-proxy", + "--alias", + "Qwen3.6-35B-A3B-GGUF", + "--ctx-size", + "135000", + "--model", + "/home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-Q8_0.gguf", + "--mmproj", + "/home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/mmproj-Qwen3.6-35B-A3B-BF16.gguf", + "--parallel", + "1" + ], + "preset": "[Qwen3.6-35B-A3B-GGUF]\nui-mcp-proxy = 1\nctx-size = 135000\nmodel = /home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-Q8_0.gguf\nmmproj = /home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/mmproj-Qwen3.6-35B-A3B-BF16.gguf\nparallel = 1\n\n" + }, + "architecture": { + "input_modalities": [ + "text", + "image" + ], + "output_modalities": [ + "text" + ] + }, + "need_download": false, + "meta": { + "vocab_type": 2, + "n_vocab": 248320, + "n_ctx": 135168, + "n_ctx_train": 262144, + "n_embd": 2048, + "n_params": 34660610688, + "size": 36892150272 + } + } + ], + "object": "list" +} diff --git a/crates/forge_app/src/dto/openai/fixtures/lmstudio_models_response.json b/crates/forge_app/src/dto/openai/fixtures/lmstudio_models_response.json new file mode 100644 index 0000000000..e5827f2958 --- /dev/null +++ b/crates/forge_app/src/dto/openai/fixtures/lmstudio_models_response.json @@ -0,0 +1,25 @@ +{ + "data": [ + { + "id": "nvidia/nemotron-3-nano-omni", + "object": "model", + "owned_by": "organization_owner" + }, + { + "id": "qwen/qwen3.6-35b-a3b", + "object": "model", + "owned_by": "organization_owner" + }, + { + "id": "glm-4.7-flash@q8_0", + "object": "model", + "owned_by": "organization_owner" + }, + { + "id": "text-embedding-nomic-embed-text-v1.5", + "object": "model", + "owned_by": "organization_owner" + } + ], + "object": "list" +} diff --git a/crates/forge_app/src/dto/openai/model.rs b/crates/forge_app/src/dto/openai/model.rs index f0e33ab6fc..508fd82f04 100644 --- a/crates/forge_app/src/dto/openai/model.rs +++ b/crates/forge_app/src/dto/openai/model.rs @@ -44,8 +44,8 @@ pub struct Model { #[derive(Debug, Deserialize, Serialize, Clone)] pub struct Architecture { - pub modality: String, - pub tokenizer: String, + pub modality: Option, + pub tokenizer: Option, pub instruct_type: Option, pub input_modalities: Option>, pub output_modalities: Option>, @@ -264,6 +264,35 @@ mod tests { assert_eq!(actual.pricing.as_ref().unwrap().prompt, Some(0.0015)); assert_eq!(actual.pricing.as_ref().unwrap().completion, Some(0.0002)); } + #[tokio::test] + async fn test_lmstudio_model_list() { + // LM Studio's /v1/models returns minimal entries with no `architecture` + // field. + let fixture = load_fixture("lmstudio_models_response.json").await; + + let actual = serde_json::from_value::(fixture).unwrap(); + + assert_eq!(actual.data.len(), 4); + assert_eq!(actual.data[0].id.as_str(), "nvidia/nemotron-3-nano-omni"); + // LM Studio omits `architecture` entirely, so it must stay optional. + assert!(actual.data[0].architecture.is_none()); + } + + #[tokio::test] + async fn test_llamacpp_model_list() { + // llama-server's /v1/models returns an "architecture" object that only has + // input_modalities/output_modalities, without "modality"/"tokenizer". + let fixture = load_fixture("llamacpp_models_response.json").await; + + let actual = serde_json::from_value::(fixture).unwrap(); + + assert_eq!(actual.data.len(), 2); + assert_eq!(actual.data[0].id.as_str(), "GLM-4.7-Flash-GGUF"); + // An `architecture` object missing `modality`/`tokenizer` still + // deserializes, since those fields are optional. + assert!(actual.data[0].architecture.is_some()); + } + #[tokio::test] async fn test_model_conversion_without_supported_parameters() { let model = Model {