from openai import OpenAIimport osmodel_id ="abcd1234"# Replace with your model IDdeployment_id ="4321cbda"# [Optional] Replace with your deployment IDclient = OpenAI( api_key=os.environ["BASETEN_API_KEY"], base_url=f"https://bridge.baseten.co/v1/direct")response = client.chat.completions.create( model=f"meta-llama/Meta-Llama-3.1-70B-Instruct",# Replace with your model name messages=[{"role":"user","content":"Who won the world series in 2020?"},{"role":"assistant","content":"The Los Angeles Dodgers won the World Series in 2020."},{"role":"user","content":"Where was it played?"}], extra_body={"baseten":{"model_id": model_id,"deployment_id": deployment_id}})print(response.choices[0].message.content)
{"choices":[{"finish_reason":null,"index":0,"message":{"content":"The 2020 World Series was played in Texas at Globe Life Field in Arlington.","role":"assistant"}}],"created":1700584611,"id":"chatcmpl-eedbac8f-f68d-4769-a1a7-a1c550be8d08","model":"abcd1234","object":"chat.completion","usage":{"completion_tokens":0,"prompt_tokens":0,"total_tokens":0}}
Use this endpoint with the OpenAI Python client and any deployment of a compatible model deployed on Baseten.
If youβre serving a vLLM model in OpenAI compatible mode, this endpoint will support that model out of the box.
If your model does not have an OpenAI compatible mode, you can use the previous version of the bridge to make it compatible with OpenAIβs client, but with a more limited set of supported features.
Parameters supported by the OpenAI ChatCompletions request can be found in the OpenAI documentation.
Below are details about Baseten-specific arguments that must be passed into the bridge.
Typically Hugging Face repo name (e.g. meta-llama/Meta-Llama-3.1-70B-Instruct). In some cases, it may be another default specified by your inference engine.
Pin your openai package version in your requirements.txt file. This helps avoid any breaking changes that get introduced
through package upgrades
If you must make breaking changes to your truss server (i.e. to introduce a new feature), you should first publish a new model deployment then update your API call on the client side.
from openai import OpenAIimport osmodel_id ="abcd1234"# Replace with your model IDdeployment_id ="4321cbda"# [Optional] Replace with your deployment IDclient = OpenAI( api_key=os.environ["BASETEN_API_KEY"], base_url=f"https://bridge.baseten.co/v1/direct")response = client.chat.completions.create( model=f"meta-llama/Meta-Llama-3.1-70B-Instruct",# Replace with your model name messages=[{"role":"user","content":"Who won the world series in 2020?"},{"role":"assistant","content":"The Los Angeles Dodgers won the World Series in 2020."},{"role":"user","content":"Where was it played?"}], extra_body={"baseten":{"model_id": model_id,"deployment_id": deployment_id}})print(response.choices[0].message.content)
{"choices":[{"finish_reason":null,"index":0,"message":{"content":"The 2020 World Series was played in Texas at Globe Life Field in Arlington.","role":"assistant"}}],"created":1700584611,"id":"chatcmpl-eedbac8f-f68d-4769-a1a7-a1c550be8d08","model":"abcd1234","object":"chat.completion","usage":{"completion_tokens":0,"prompt_tokens":0,"total_tokens":0}}
from openai import OpenAIimport osmodel_id ="abcd1234"# Replace with your model IDdeployment_id ="4321cbda"# [Optional] Replace with your deployment IDclient = OpenAI( api_key=os.environ["BASETEN_API_KEY"], base_url=f"https://bridge.baseten.co/v1/direct")response = client.chat.completions.create( model=f"meta-llama/Meta-Llama-3.1-70B-Instruct",# Replace with your model name messages=[{"role":"user","content":"Who won the world series in 2020?"},{"role":"assistant","content":"The Los Angeles Dodgers won the World Series in 2020."},{"role":"user","content":"Where was it played?"}], extra_body={"baseten":{"model_id": model_id,"deployment_id": deployment_id}})print(response.choices[0].message.content)
{"choices":[{"finish_reason":null,"index":0,"message":{"content":"The 2020 World Series was played in Texas at Globe Life Field in Arlington.","role":"assistant"}}],"created":1700584611,"id":"chatcmpl-eedbac8f-f68d-4769-a1a7-a1c550be8d08","model":"abcd1234","object":"chat.completion","usage":{"completion_tokens":0,"prompt_tokens":0,"total_tokens":0}}