All LiteLLM models work out-of-the-box with the python openai library
pip install openai
from openai import OpenAI
client = OpenAI(
api_key="<YOUR KEY>",
base_url="<https://litellm.sph-prod.ethz.ch/v1>"
)
response = client.chat.completions.create(
model="openrouter/openai/gpt-4.1-mini",
messages=[
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
max_tokens=16
)
print(response) # full raw response
print(response.choices[0].message.content) # model output only
ChatCompletion(
id='gen-1779453344-pCY5vpe8bGrZJrT5h5pw',
choices=[
Choice(
finish_reason='length',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content='A whisper in the morning light, \\nDreams awaken, taking flight. \\n',
refusal=None,
role='assistant',
annotations=None,
audio=None,
function_call=None,
tool_calls=None,
provider_specific_fields={
'refusal': None,
'reasoning': None
}
),
provider_specific_fields={
'native_finish_reason': 'max_output_tokens'
}
)
],
created=1779453344,
model='openrouter/openai/gpt-4.1-mini',
object='chat.completion',
service_tier='default',
system_fingerprint=None,
usage=CompletionUsage(
completion_tokens=16,
prompt_tokens=17,
total_tokens=33,
completion_tokens_details=CompletionTokensDetails(
accepted_prediction_tokens=None,
audio_tokens=0,
reasoning_tokens=0,
rejected_prediction_tokens=None,
image_tokens=0
),
prompt_tokens_details=PromptTokensDetails(
audio_tokens=0,
cached_tokens=0,
video_tokens=0,
cache_write_tokens=0
),
cost=3.24e-05,
is_byok=False,
cost_details={
'upstream_inference_cost': 3.24e-05,
'upstream_inference_prompt_cost': 6.8e-06,
'upstream_inference_completions_cost': 2.56e-05
}
),
provider='Azure'
)
Output:
A whisper in the morning light,
Dreams awaken, taking flight.
from openai import OpenAI
client = OpenAI(
api_key="API_KEY",
base_url="<https://litellm.sph-prod.ethz.ch/v1>"
)
model = "openrouter/openai/gpt-4.1-mini"
messages = [
{
"role": "system",
"content": "You are a helpful, conversational assistant. Respond naturally and helpfully."
}
]
while True:
user_input = input("You: ")
if user_input.lower() in ["exit", "quit"]:
break
messages.append({
"role": "user",
"content": user_input
})
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=200
)
reply = response.choices[0].message.content
print("Bot:", reply)
messages.append({
"role": "assistant",
"content": reply
})