diff --git a/README.md b/README.md index 3eea186..c0750a4 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ chat_completion = client.chat.completions.create( "content": "Explain the importance of low latency LLMs", } ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) print(chat_completion.choices[0].message.content) ``` @@ -71,7 +71,7 @@ async def main() -> None: "content": "Explain the importance of low latency LLMs", } ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) print(chat_completion.choices[0].message.content) @@ -113,7 +113,7 @@ async def main() -> None: "content": "Explain the importance of low latency LLMs", } ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) print(chat_completion.id) @@ -197,7 +197,7 @@ try: "content": "Explain the importance of low latency LLMs", }, ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) except groq.APIConnectionError as e: print("The server could not be reached") @@ -252,7 +252,7 @@ client.with_options(max_retries=5).chat.completions.create( "content": "Explain the importance of low latency LLMs", }, ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) ``` @@ -287,7 +287,7 @@ client.with_options(timeout=5.0).chat.completions.create( "content": "Explain the importance of low latency LLMs", }, ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) ``` @@ -337,7 +337,7 @@ response = client.chat.completions.with_raw_response.create( "role": "user", "content": "Explain the importance of low latency LLMs", }], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) print(response.headers.get('X-My-Header')) @@ -367,7 +367,7 @@ with client.chat.completions.with_streaming_response.create( "content": "Explain the importance of low latency LLMs", }, ], - model="openai/gpt-oss-20b", + model="llama-3.3-70b-versatile", ) as response: print(response.headers.get("X-My-Header"))