diff --git a/models/nvidia/gliner-pii.yaml b/models/nvidia/gliner-pii.yaml new file mode 100644 index 0000000..f0bc975 --- /dev/null +++ b/models/nvidia/gliner-pii.yaml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: gliner-pii +params: + - path: threshold + type: number + label: Threshold + description: Confidence threshold for entity detection. Lower values detect more entities but may include false positives. + default: 0.5 + range: + min: 0 + max: 1 + group: sampling + - path: chunk_length + type: integer + label: Chunk length + description: Context window size for processing. Longer texts are automatically split into chunks with overlap for complete coverage. Must be greater than overlap. + default: 384 + range: + min: 1 + max: 2048 + group: provider_metadata + - path: overlap + type: integer + label: Overlap + description: Token overlap between chunks to prevent entity clipping. Must be less than chunk_length. + default: 128 + range: + min: 0 + max: 512 + group: provider_metadata + - path: flat_ner + type: boolean + label: Flat NER + description: When true, prevents overlapping entity spans. When false, may return nested entities such as both a full name and its constituent first name. + default: false + group: provider_metadata diff --git a/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml b/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml new file mode 100644 index 0000000..bfc0d61 --- /dev/null +++ b/models/nvidia/llama-3.1-nemoguard-8b-topic-control.yaml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemoguard-8b-topic-control +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.5 + range: + min: 0 + max: 2 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml b/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml new file mode 100644 index 0000000..fc46239 --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-nano-8b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-nano-8b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml b/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml new file mode 100644 index 0000000..4256093 --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-safety-guard-8b-v3.yaml @@ -0,0 +1,14 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-safety-guard-8b-v3 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + default: 0 + range: + min: 0 + max: 1 + group: sampling diff --git a/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml b/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml new file mode 100644 index 0000000..367b0ef --- /dev/null +++ b/models/nvidia/llama-3.1-nemotron-ultra-253b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.1-nemotron-ultra-253b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml b/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml new file mode 100644 index 0000000..b20d19b --- /dev/null +++ b/models/nvidia/llama-3.3-nemotron-super-49b-v1.5.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.3-nemotron-super-49b-v1.5 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 65536 + range: + min: 1 + max: 65536 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml b/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml new file mode 100644 index 0000000..313d97e --- /dev/null +++ b/models/nvidia/llama-3.3-nemotron-super-49b-v1.yaml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: llama-3.3-nemotron-super-49b-v1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.6 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 4096 + range: + min: 1 + max: 16384 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results. + default: 0 + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemoguard-jailbreak-detect.yaml b/models/nvidia/nemoguard-jailbreak-detect.yaml new file mode 100644 index 0000000..4a6231d --- /dev/null +++ b/models/nvidia/nemoguard-jailbreak-detect.yaml @@ -0,0 +1,5 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemoguard-jailbreak-detect +params: [] diff --git a/models/nvidia/nemotron-3-nano-30b-a3b.yaml b/models/nvidia/nemotron-3-nano-30b-a3b.yaml new file mode 100644 index 0000000..aaf9bcc --- /dev/null +++ b/models/nvidia/nemotron-3-nano-30b-a3b.yaml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-nano-30b-a3b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-3-super-120b-a12b.yaml b/models/nvidia/nemotron-3-super-120b-a12b.yaml new file mode 100644 index 0000000..1a81de2 --- /dev/null +++ b/models/nvidia/nemotron-3-super-120b-a12b.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-super-120b-a12b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'low' enables low-effort reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - low + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-3-ultra-550b-a55b.yaml b/models/nvidia/nemotron-3-ultra-550b-a55b.yaml new file mode 100644 index 0000000..b7c2fef --- /dev/null +++ b/models/nvidia/nemotron-3-ultra-550b-a55b.yaml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-3-ultra-550b-a55b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.95 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: reasoning_effort + type: enum + label: Reasoning effort + description: Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning. + default: high + values: + - none + - medium + - high + group: reasoning + - path: reasoning_budget + type: integer + label: Reasoning budget + description: Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement. + default: 16384 + range: + min: -1 + max: 32768 + group: reasoning + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-content-safety-reasoning-4b.yaml b/models/nvidia/nemotron-content-safety-reasoning-4b.yaml new file mode 100644 index 0000000..7bfa8bb --- /dev/null +++ b/models/nvidia/nemotron-content-safety-reasoning-4b.yaml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-content-safety-reasoning-4b +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 16384 + range: + min: 1 + max: 32768 + group: generation_length + - path: seed + type: integer + label: Seed + description: Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result. + range: + min: 0 + max: 18446744073709552000 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/nemotron-mini-4b-instruct.yaml b/models/nvidia/nemotron-mini-4b-instruct.yaml new file mode 100644 index 0000000..0283f9a --- /dev/null +++ b/models/nvidia/nemotron-mini-4b-instruct.yaml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: nemotron-mini-4b-instruct +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.2 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.7 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + max: 4096 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/riva-translate-4b-instruct-v1.1.yaml b/models/nvidia/riva-translate-4b-instruct-v1.1.yaml new file mode 100644 index 0000000..420c6e0 --- /dev/null +++ b/models/nvidia/riva-translate-4b-instruct-v1.1.yaml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: riva-translate-4b-instruct-v1.1 +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 0.9 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 512 + range: + min: 1 + max: 4096 + group: generation_length + - path: frequency_penalty + type: number + label: Frequency penalty + description: Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: presence_penalty + type: number + label: Presence penalty + description: Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + default: 0 + range: + min: -2 + max: 2 + group: sampling + - path: stop + type: string + label: Stop + description: A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence. + group: generation_length diff --git a/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml b/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml new file mode 100644 index 0000000..6f4aa2b --- /dev/null +++ b/models/nvidia/usdcode-llama-3.1-70b-instruct.yaml @@ -0,0 +1,42 @@ +# yaml-language-server: $schema=https://modelparams.dev/api/v1/schema.json +provider: nvidia +authType: api_key +model: usdcode-llama-3.1-70b-instruct +params: + - path: temperature + type: number + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call. + default: 0.1 + range: + min: 0 + max: 1 + group: sampling + - path: top_p + type: number + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call. + default: 1 + range: + max: 1 + group: sampling + - path: max_tokens + type: integer + label: Max tokens + description: Maximum number of tokens to generate. Generation stops when this limit is reached. + default: 1024 + range: + min: 1 + max: 2048 + group: generation_length + - path: expert_type + type: enum + label: Expert type + description: The type of expert to use. 'knowledge' answers with USD knowledge, 'code' responds with vanilla OpenUSD code, 'helperfunction' uses high-level helper functions, and 'auto' lets the LLM determine which expert to use. + default: auto + values: + - auto + - code + - knowledge + - helperfunction + group: provider_metadata diff --git a/packages/modelparams/scripts/codegen.ts b/packages/modelparams/scripts/codegen.ts index 3ae2ca9..b532a5e 100644 --- a/packages/modelparams/scripts/codegen.ts +++ b/packages/modelparams/scripts/codegen.ts @@ -32,6 +32,10 @@ function quoteKey(k: string): string { function emitParamsByIdEntry(m: Model): string { const id = modelId(m); const fields = m.params.map((p) => ` ${quoteKey(p.path)}: ${tsType(p)};`).join("\n"); + if (fields.length === 0) { + return ` ${JSON.stringify(id)}: Record;`; + } + return ` ${JSON.stringify(id)}: {\n${fields}\n };`; } diff --git a/packages/modelparams/src/generated/data.ts b/packages/modelparams/src/generated/data.ts index 0f8611e..49d13ec 100644 --- a/packages/modelparams/src/generated/data.ts +++ b/packages/modelparams/src/generated/data.ts @@ -9917,6 +9917,982 @@ export const CATALOG = [ } ] }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "gliner-pii", + "params": [ + { + "path": "threshold", + "label": "Threshold", + "description": "Confidence threshold for entity detection. Lower values detect more entities but may include false positives.", + "group": "sampling", + "type": "number", + "default": 0.5, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "chunk_length", + "label": "Chunk length", + "description": "Context window size for processing. Longer texts are automatically split into chunks with overlap for complete coverage. Must be greater than overlap.", + "group": "provider_metadata", + "type": "integer", + "default": 384, + "range": { + "min": 1, + "max": 2048 + } + }, + { + "path": "overlap", + "label": "Overlap", + "description": "Token overlap between chunks to prevent entity clipping. Must be less than chunk_length.", + "group": "provider_metadata", + "type": "integer", + "default": 128, + "range": { + "min": 0, + "max": 512 + } + }, + { + "path": "flat_ner", + "label": "Flat NER", + "description": "When true, prevents overlapping entity spans. When false, may return nested entities such as both a full name and its constituent first name.", + "group": "provider_metadata", + "type": "boolean", + "default": false + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.1-nemoguard-8b-topic-control", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.5, + "range": { + "min": 0, + "max": 2 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 1024, + "range": { + "min": 1 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.1-nemotron-nano-8b-v1", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.6, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 4096, + "range": { + "min": 1, + "max": 16384 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results.", + "group": "sampling", + "type": "integer", + "default": 0, + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.1-nemotron-safety-guard-8b-v3", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": 0, + "max": 1 + } + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.1-nemotron-ultra-253b-v1", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.6, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 4096, + "range": { + "min": 1, + "max": 16384 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results.", + "group": "sampling", + "type": "integer", + "default": 0, + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.3-nemotron-super-49b-v1", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.6, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 4096, + "range": { + "min": 1, + "max": 16384 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results.", + "group": "sampling", + "type": "integer", + "default": 0, + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "llama-3.3-nemotron-super-49b-v1.5", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.6, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 65536, + "range": { + "min": 1, + "max": 65536 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Changing the seed produces a different response with similar characteristics. Fix the seed to reproduce results.", + "group": "sampling", + "type": "integer", + "default": 0, + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemoguard-jailbreak-detect", + "params": [] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemotron-3-nano-30b-a3b", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result.", + "group": "sampling", + "type": "integer", + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemotron-3-super-120b-a12b", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "reasoning_effort", + "label": "Reasoning effort", + "description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'low' enables low-effort reasoning, and 'high' enables full reasoning.", + "group": "reasoning", + "type": "enum", + "default": "high", + "values": [ + "none", + "low", + "high" + ] + }, + { + "path": "reasoning_budget", + "label": "Reasoning budget", + "description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.", + "group": "reasoning", + "type": "integer", + "default": 16384, + "range": { + "min": -1, + "max": 32768 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result.", + "group": "sampling", + "type": "integer", + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemotron-3-ultra-550b-a55b", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.95, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "reasoning_effort", + "label": "Reasoning effort", + "description": "Controls the reasoning mode. 'none' disables reasoning tokens, 'medium' enables efficient reasoning, and 'high' enables full reasoning.", + "group": "reasoning", + "type": "enum", + "default": "high", + "values": [ + "none", + "medium", + "high" + ] + }, + { + "path": "reasoning_budget", + "label": "Reasoning budget", + "description": "Maximum number of tokens the model may use for internal reasoning before being forced to end the reasoning trace. Use -1 to disable budget enforcement.", + "group": "reasoning", + "type": "integer", + "default": 16384, + "range": { + "min": -1, + "max": 32768 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result.", + "group": "sampling", + "type": "integer", + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemotron-content-safety-reasoning-4b", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 16384, + "range": { + "min": 1, + "max": 32768 + } + }, + { + "path": "seed", + "label": "Seed", + "description": "Best-effort deterministic sampling seed. Repeated requests with the same seed and parameters should return the same result.", + "group": "sampling", + "type": "integer", + "range": { + "min": 0, + "max": 18446744073709552000 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "nemotron-mini-4b-instruct", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.2, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.7, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 1024, + "range": { + "min": 1, + "max": 4096 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "riva-translate-4b-instruct-v1.1", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.9, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 512, + "range": { + "min": 1, + "max": 4096 + } + }, + { + "path": "frequency_penalty", + "label": "Frequency penalty", + "description": "Penalizes new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "presence_penalty", + "label": "Presence penalty", + "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "group": "sampling", + "type": "number", + "default": 0, + "range": { + "min": -2, + "max": 2 + } + }, + { + "path": "stop", + "label": "Stop", + "description": "A string or list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "group": "generation_length", + "type": "string" + } + ] + }, + { + "provider": "nvidia", + "authType": "api_key", + "model": "usdcode-llama-3.1-70b-instruct", + "params": [ + { + "path": "temperature", + "label": "Temperature", + "description": "Controls randomness. Lower values make outputs more focused; higher values make them more varied. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 0.1, + "range": { + "min": 0, + "max": 1 + } + }, + { + "path": "top_p", + "label": "Top P", + "description": "Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. Not recommended to modify both temperature and top_p in the same call.", + "group": "sampling", + "type": "number", + "default": 1, + "range": { + "max": 1 + } + }, + { + "path": "max_tokens", + "label": "Max tokens", + "description": "Maximum number of tokens to generate. Generation stops when this limit is reached.", + "group": "generation_length", + "type": "integer", + "default": 1024, + "range": { + "min": 1, + "max": 2048 + } + }, + { + "path": "expert_type", + "label": "Expert type", + "description": "The type of expert to use. 'knowledge' answers with USD knowledge, 'code' responds with vanilla OpenUSD code, 'helperfunction' uses high-level helper functions, and 'auto' lets the LLM determine which expert to use.", + "group": "provider_metadata", + "type": "enum", + "default": "auto", + "values": [ + "auto", + "code", + "knowledge", + "helperfunction" + ] + } + ] + }, { "provider": "openai", "authType": "api_key", diff --git a/packages/modelparams/src/generated/defaults.ts b/packages/modelparams/src/generated/defaults.ts index e64886f..fdb86a2 100644 --- a/packages/modelparams/src/generated/defaults.ts +++ b/packages/modelparams/src/generated/defaults.ts @@ -730,6 +730,99 @@ export const DEFAULTS = { frequency_penalty: 0, "response_format.type": "text", }, + "nvidia/gliner-pii": { + threshold: 0.5, + chunk_length: 384, + overlap: 128, + flat_ner: false, + }, + "nvidia/llama-3.1-nemoguard-8b-topic-control": { + temperature: 0.5, + top_p: 1, + max_tokens: 1024, + frequency_penalty: 0, + presence_penalty: 0, + }, + "nvidia/llama-3.1-nemotron-nano-8b-v1": { + temperature: 0.6, + top_p: 0.95, + max_tokens: 4096, + frequency_penalty: 0, + presence_penalty: 0, + seed: 0, + }, + "nvidia/llama-3.1-nemotron-safety-guard-8b-v3": { + temperature: 0, + }, + "nvidia/llama-3.1-nemotron-ultra-253b-v1": { + temperature: 0.6, + top_p: 0.95, + max_tokens: 4096, + frequency_penalty: 0, + presence_penalty: 0, + seed: 0, + }, + "nvidia/llama-3.3-nemotron-super-49b-v1": { + temperature: 0.6, + top_p: 0.95, + max_tokens: 4096, + frequency_penalty: 0, + presence_penalty: 0, + seed: 0, + }, + "nvidia/llama-3.3-nemotron-super-49b-v1.5": { + temperature: 0.6, + top_p: 0.95, + max_tokens: 65536, + frequency_penalty: 0, + presence_penalty: 0, + seed: 0, + }, + "nvidia/nemoguard-jailbreak-detect": {}, + "nvidia/nemotron-3-nano-30b-a3b": { + temperature: 1, + top_p: 1, + max_tokens: 16384, + }, + "nvidia/nemotron-3-super-120b-a12b": { + temperature: 1, + top_p: 0.95, + max_tokens: 16384, + reasoning_effort: "high", + reasoning_budget: 16384, + }, + "nvidia/nemotron-3-ultra-550b-a55b": { + temperature: 1, + top_p: 0.95, + max_tokens: 16384, + reasoning_effort: "high", + reasoning_budget: 16384, + }, + "nvidia/nemotron-content-safety-reasoning-4b": { + temperature: 1, + top_p: 1, + max_tokens: 16384, + }, + "nvidia/nemotron-mini-4b-instruct": { + temperature: 0.2, + top_p: 0.7, + max_tokens: 1024, + frequency_penalty: 0, + presence_penalty: 0, + }, + "nvidia/riva-translate-4b-instruct-v1.1": { + temperature: 0, + top_p: 0.9, + max_tokens: 512, + frequency_penalty: 0, + presence_penalty: 0, + }, + "nvidia/usdcode-llama-3.1-70b-instruct": { + temperature: 0.1, + top_p: 1, + max_tokens: 1024, + expert_type: "auto", + }, "openai/chatgpt-4o-latest": { max_tokens: 4096, temperature: 1, diff --git a/packages/modelparams/src/generated/model-ids.ts b/packages/modelparams/src/generated/model-ids.ts index 33a1830..3b7eae0 100644 --- a/packages/modelparams/src/generated/model-ids.ts +++ b/packages/modelparams/src/generated/model-ids.ts @@ -109,6 +109,21 @@ export const MODEL_IDS = [ "moonshot/moonshot-v1-128k", "moonshot/moonshot-v1-32k", "moonshot/moonshot-v1-8k", + "nvidia/gliner-pii", + "nvidia/llama-3.1-nemoguard-8b-topic-control", + "nvidia/llama-3.1-nemotron-nano-8b-v1", + "nvidia/llama-3.1-nemotron-safety-guard-8b-v3", + "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "nvidia/llama-3.3-nemotron-super-49b-v1", + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "nvidia/nemoguard-jailbreak-detect", + "nvidia/nemotron-3-nano-30b-a3b", + "nvidia/nemotron-3-super-120b-a12b", + "nvidia/nemotron-3-ultra-550b-a55b", + "nvidia/nemotron-content-safety-reasoning-4b", + "nvidia/nemotron-mini-4b-instruct", + "nvidia/riva-translate-4b-instruct-v1.1", + "nvidia/usdcode-llama-3.1-70b-instruct", "openai/chatgpt-4o-latest", "openai/gpt-3.5-turbo", "openai/gpt-4-turbo", @@ -192,6 +207,7 @@ export const PROVIDERS = [ "minimax", "mistral", "moonshot", + "nvidia", "openai", "perplexity", "xai", diff --git a/packages/modelparams/src/generated/params-by-id.ts b/packages/modelparams/src/generated/params-by-id.ts index 67765f8..17357d0 100644 --- a/packages/modelparams/src/generated/params-by-id.ts +++ b/packages/modelparams/src/generated/params-by-id.ts @@ -913,6 +913,114 @@ export type ParamsById = { frequency_penalty: number; "response_format.type": "text" | "json_object"; }; + "nvidia/gliner-pii": { + threshold: number; + chunk_length: number; + overlap: number; + flat_ner: boolean; + }; + "nvidia/llama-3.1-nemoguard-8b-topic-control": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + stop: string; + }; + "nvidia/llama-3.1-nemotron-nano-8b-v1": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + seed: number; + stop: string; + }; + "nvidia/llama-3.1-nemotron-safety-guard-8b-v3": { + temperature: number; + }; + "nvidia/llama-3.1-nemotron-ultra-253b-v1": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + seed: number; + stop: string; + }; + "nvidia/llama-3.3-nemotron-super-49b-v1": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + seed: number; + stop: string; + }; + "nvidia/llama-3.3-nemotron-super-49b-v1.5": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + seed: number; + stop: string; + }; + "nvidia/nemoguard-jailbreak-detect": Record; + "nvidia/nemotron-3-nano-30b-a3b": { + temperature: number; + top_p: number; + max_tokens: number; + seed: number; + stop: string; + }; + "nvidia/nemotron-3-super-120b-a12b": { + temperature: number; + top_p: number; + max_tokens: number; + reasoning_effort: "none" | "low" | "high"; + reasoning_budget: number; + seed: number; + stop: string; + }; + "nvidia/nemotron-3-ultra-550b-a55b": { + temperature: number; + top_p: number; + max_tokens: number; + reasoning_effort: "none" | "medium" | "high"; + reasoning_budget: number; + seed: number; + stop: string; + }; + "nvidia/nemotron-content-safety-reasoning-4b": { + temperature: number; + top_p: number; + max_tokens: number; + seed: number; + stop: string; + }; + "nvidia/nemotron-mini-4b-instruct": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + stop: string; + }; + "nvidia/riva-translate-4b-instruct-v1.1": { + temperature: number; + top_p: number; + max_tokens: number; + frequency_penalty: number; + presence_penalty: number; + stop: string; + }; + "nvidia/usdcode-llama-3.1-70b-instruct": { + temperature: number; + top_p: number; + max_tokens: number; + expert_type: "auto" | "code" | "knowledge" | "helperfunction"; + }; "openai/chatgpt-4o-latest": { max_tokens: number; temperature: number;