tailcallhq · imrehg · Jun 14, 2026
diff --git a/crates/forge_app/src/dto/openai/fixtures/llamacpp_models_response.json b/crates/forge_app/src/dto/openai/fixtures/llamacpp_models_response.json
@@ -0,0 +1,91 @@
+{
+  "data": [
+    {
+      "id": "GLM-4.7-Flash-GGUF",
+      "aliases": [],
+      "tags": [],
+      "object": "model",
+      "owned_by": "llamacpp",
+      "created": 1781360984,
+      "status": {
+        "value": "unloaded",
+        "args": [
+          "/usr/bin/llama-server",
+          "--host",
+          "127.0.0.1",
+          "--port",
+          "0",
+          "--ui-mcp-proxy",
+          "--alias",
+          "GLM-4.7-Flash-GGUF",
+          "--ctx-size",
+          "135000",
+          "--model",
+          "/home/greg/.lmstudio/models/lmstudio-community/GLM-4.7-Flash-GGUF/GLM-4.7-Flash-Q4_K_M.gguf",
+          "--parallel",
+          "1"
+        ],
+        "preset": "[GLM-4.7-Flash-GGUF]\nui-mcp-proxy = 1\nctx-size = 135000\nmodel = /home/greg/.lmstudio/models/lmstudio-community/GLM-4.7-Flash-GGUF/GLM-4.7-Flash-Q4_K_M.gguf\nparallel = 1\n\n"
+      },
+      "architecture": {
+        "input_modalities": [
+          "text"
+        ],
+        "output_modalities": [
+          "text"
+        ]
+      },
+      "need_download": false
+    },
+    {
+      "id": "Qwen3.6-35B-A3B-GGUF",
+      "aliases": [],
+      "tags": [],
+      "object": "model",
+      "owned_by": "llamacpp",
+      "created": 1781360984,
+      "status": {
+        "value": "loaded",
+        "args": [
+          "/usr/bin/llama-server",
+          "--host",
+          "127.0.0.1",
+          "--port",
+          "49759",
+          "--ui-mcp-proxy",
+          "--alias",
+          "Qwen3.6-35B-A3B-GGUF",
+          "--ctx-size",
+          "135000",
+          "--model",
+          "/home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-Q8_0.gguf",
+          "--mmproj",
+          "/home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/mmproj-Qwen3.6-35B-A3B-BF16.gguf",
+          "--parallel",
+          "1"
+        ],
+        "preset": "[Qwen3.6-35B-A3B-GGUF]\nui-mcp-proxy = 1\nctx-size = 135000\nmodel = /home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-Q8_0.gguf\nmmproj = /home/greg/.lmstudio/models/lmstudio-community/Qwen3.6-35B-A3B-GGUF/mmproj-Qwen3.6-35B-A3B-BF16.gguf\nparallel = 1\n\n"
+      },
+      "architecture": {
+        "input_modalities": [
+          "text",
+          "image"
+        ],
+        "output_modalities": [
+          "text"
+        ]
+      },
+      "need_download": false,
+      "meta": {
+        "vocab_type": 2,
+        "n_vocab": 248320,
+        "n_ctx": 135168,
+        "n_ctx_train": 262144,
+        "n_embd": 2048,
+        "n_params": 34660610688,
+        "size": 36892150272
+      }
+    }
+  ],
+  "object": "list"
+}
diff --git a/crates/forge_app/src/dto/openai/fixtures/lmstudio_models_response.json b/crates/forge_app/src/dto/openai/fixtures/lmstudio_models_response.json
@@ -0,0 +1,25 @@
+{
+  "data": [
+    {
+      "id": "nvidia/nemotron-3-nano-omni",
+      "object": "model",
+      "owned_by": "organization_owner"
+    },
+    {
+      "id": "qwen/qwen3.6-35b-a3b",
+      "object": "model",
+      "owned_by": "organization_owner"
+    },
+    {
+      "id": "glm-4.7-flash@q8_0",
+      "object": "model",
+      "owned_by": "organization_owner"
+    },
+    {
+      "id": "text-embedding-nomic-embed-text-v1.5",
+      "object": "model",
+      "owned_by": "organization_owner"
+    }
+  ],
+  "object": "list"
+}
diff --git a/crates/forge_app/src/dto/openai/model.rs b/crates/forge_app/src/dto/openai/model.rs
@@ -44,8 +44,8 @@ pub struct Model {
 
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct Architecture {
-    pub modality: String,
-    pub tokenizer: String,
+    pub modality: Option<String>,
+    pub tokenizer: Option<String>,
     pub instruct_type: Option<String>,
     pub input_modalities: Option<Vec<String>>,
     pub output_modalities: Option<Vec<String>>,
@@ -264,6 +264,35 @@ mod tests {
         assert_eq!(actual.pricing.as_ref().unwrap().prompt, Some(0.0015));
         assert_eq!(actual.pricing.as_ref().unwrap().completion, Some(0.0002));
     }
+    #[tokio::test]
+    async fn test_lmstudio_model_list() {
+        // LM Studio's /v1/models returns minimal entries with no `architecture`
+        // field.
+        let fixture = load_fixture("lmstudio_models_response.json").await;
+
+        let actual = serde_json::from_value::<ListModelResponse>(fixture).unwrap();
+
+        assert_eq!(actual.data.len(), 4);
+        assert_eq!(actual.data[0].id.as_str(), "nvidia/nemotron-3-nano-omni");
+        // LM Studio omits `architecture` entirely, so it must stay optional.
+        assert!(actual.data[0].architecture.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_llamacpp_model_list() {
+        // llama-server's /v1/models returns an "architecture" object that only has
+        // input_modalities/output_modalities, without "modality"/"tokenizer".
+        let fixture = load_fixture("llamacpp_models_response.json").await;
+
+        let actual = serde_json::from_value::<ListModelResponse>(fixture).unwrap();
+
+        assert_eq!(actual.data.len(), 2);
+        assert_eq!(actual.data[0].id.as_str(), "GLM-4.7-Flash-GGUF");
+        // An `architecture` object missing `modality`/`tokenizer` still
+        // deserializes, since those fields are optional.
+        assert!(actual.data[0].architecture.is_some());
+    }
+
     #[tokio::test]
     async fn test_model_conversion_without_supported_parameters() {
         let model = Model {