html,
body {
  background-image: url("https://ny2vcqfthujcm8md.public.blob.vercel-storage.com/new-top-right-bg-GsRxhtLWkGqRqwsLC0Ulzc0ZVYY94k.png");
  background-position: top right;
  background-repeat: no-repeat;
  background-size: 715px 379px;
  background-attachment: fixed;
}

.card {
  border-radius: 8px !important;
}

.homepage-bg {
  background-image: url("https://ny2vcqfthujcm8md.public.blob.vercel-storage.com/homepage-top-left-bg-MpWlP1ZN2NjOMVi4cJyonJox8RoNGU.png");
  background-size: 715px 458px;
  background-position: top left;
    background-repeat: no-repeat;
  height: 458px; 
  width: 100%;
  position: absolute;
}

.homepage-content {
  margin-top: 150px !important;
}

#navbar-transition-maple {
  background: #fff;
}

#table-of-contents-content {
  font-size: 12px;
}

.codeblock-dark,
.codeblock-light {
  border: 1px solid #aae7ce;
  border-radius: 8px;
  background: #f0fcf7;
}

.examples-header {
  background: #fbf9f5;
  padding: 40px;
  border-radius: 8px;
}

/* Status card styles */
.status-card {
  transition: all 0.3s ease;
  border: 1px solid rgba(255, 255, 255, 0.1);
  background-color: rgba(0, 0, 0, 0.05);
  border-radius: 12px;
  cursor: pointer;
}

.status-card:hover {
  transform: translateY(-2px);
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
  border-color: rgba(37, 199, 62, 0.3);
}

.status-card.degraded:hover {
  border-color: rgba(255, 204, 0, 0.3);
}

.status-card.down:hover {
  border-color: rgba(255, 59, 48, 0.3);
}

.status-card .card-title {
  font-weight: 600;
  margin-bottom: 4px;
}

.status-card .status-indicator {
  font-size: 0.9rem;
  opacity: 0.7;
  font-weight: 500;
  margin-top: 8px;
  text-align: center;
  padding-bottom: 4px;
  border-bottom: 1px dotted rgba(0, 0, 0, 0.1);
  width: 100%;
}

/* Status banner for the top message */
.status-banner {
  display: flex;
  align-items: center;
  gap: 0.5rem;
  padding: 1rem;
  margin-bottom: 1.5rem;
  border-radius: 0.5rem;
  border: 1px solid rgba(37, 199, 62, 0.2);
  background-color: rgba(37, 199, 62, 0.05);
}

.status-banner .banner-icon {
  color: #25c73e;
}

.status-banner .banner-text {
  font-size: 0.875rem;
  color: #1a1a1a;
}

/* Operational/Normal status */
.status-card .icon[data-icon="circle-check"] {
  color: #25c73e;
  margin-bottom: 10px;
}

/* Degraded status */
.status-card.degraded {
  background-color: rgba(255, 204, 0, 0.05);
  border: 1px solid rgba(255, 204, 0, 0.2);
}

.status-card.degraded .icon[data-icon="circle-exclamation"] {
  color: #ffcc00;
}

.status-card.degraded .status-indicator {
  color: #ffcc00;
  border-bottom: 1px dotted rgba(255, 204, 0, 0.3);
}

/* Down status */
.status-card.down {
  background-color: rgba(255, 59, 48, 0.05);
  border: 1px solid rgba(255, 59, 48, 0.2);
}

.status-card.down .icon[data-icon="circle-xmark"] {
  color: #ff3b30;
}

.status-card.down .status-indicator {
  color: #ff3b30;
  border-bottom: 1px dotted rgba(255, 59, 48, 0.3);
}

/* Loading message styles */
.loading-message {
  display: flex;
  justify-content: center;
  align-items: center;
  height: 200px;
  text-align: center;
  font-size: 1.1rem;
  color: #666;
  background-color: rgba(0, 0, 0, 0.02);
  border-radius: 8px;
  border: 1px dashed rgba(0, 0, 0, 0.1);
  margin: 20px 0 40px 0;
}

/* Custom styles can be added here */

/* Status page styles */
#status-cards {
  margin-top: 2rem;
}

/* Status card styles */
#status-cards .card {
  transition: all 0.2s ease-in-out;
}

#status-cards .card[data-status="down"] {
  border-color: rgba(239, 68, 68, 0.2);
  background-color: rgba(239, 68, 68, 0.05);
}

#status-cards .card[data-status="degraded"] {
  border-color: rgba(234, 179, 8, 0.2);
  background-color: rgba(234, 179, 8, 0.05);
}

#status-cards .card[data-status="down"]:hover {
  border-color: rgb(239, 68, 68);
}

#status-cards .card[data-status="degraded"]:hover {
  border-color: rgb(234, 179, 8);
}

/* Status indicator styles */
.status-indicator {
  font-size: 0.875rem;
  color: #4b5563;
}

#status-cards .card[data-status="down"] .status-indicator {
  color: rgb(239, 68, 68);
}

#status-cards .card[data-status="degraded"] .status-indicator {
  color: rgb(234, 179, 8);
}


// Constants and configuration
const STATUS_CONFIG = {
  statusPageId: '8dp3vkb2h71c',
  apiKey: 'cc662c1e5b214d9aba05e95c40058d43',
  refreshInterval: 5 * 60 * 1000, // 5 minutes in milliseconds
  cacheLifetime: 5 * 60 * 1000, // 5 minutes in milliseconds
  testMode: false, // Set to true to use test data
  // test scenarios are: 'allOperational', 'modelInferenceDown', 'managementApiDegraded', 'webAppIssues', 'multipleIssues'
  testScenario: 'allOperational' // Current test scenario to use
};

const COMPONENT_MAP = {
  'Model Inference': 'Model Inference',
  'Model Management API': 'Management API',
  'Web Application': 'Web Application',
  'Homepage and Docs': 'Web Application'
};

const STATUS_MAPPING = {
  major_outage: { status: 'down', icon: 'circle-xmark', text: 'Down' },
  partial_outage: { status: 'down', icon: 'circle-xmark', text: 'Down' },
  degraded_performance: { status: 'degraded', icon: 'circle-exclamation', text: 'Degraded' },
  operational: { status: 'operational', icon: 'circle-check', text: 'Normal' }
};

const DEFAULT_COMPONENT_STATUS = STATUS_MAPPING.operational;

// Test scenarios
const TEST_SCENARIOS = {
  allOperational: {
    name: 'All Systems Operational',
    incidents: []
  },
  modelInferenceDown: {
    name: 'Model Inference Down',
    incidents: [{
      status: 'investigating',
      impact: 'critical',
      components: [{
        name: 'Model Inference',
        status: 'major_outage'
      }]
    }]
  },
  managementApiDegraded: {
    name: 'Management API Degraded',
    incidents: [{
      status: 'investigating',
      impact: 'major',
      components: [{
        name: 'Model Management API',
        status: 'degraded_performance'
      }]
    }]
  },
  webAppIssues: {
    name: 'Web Application Issues',
    incidents: [{
      status: 'investigating',
      impact: 'minor',
      components: [{
        name: 'Web Application',
        status: 'degraded_performance'
      }]
    }]
  },
  multipleIssues: {
    name: 'Multiple Systems Affected',
    incidents: [
      {
        status: 'investigating',
        impact: 'critical',
        components: [{
          name: 'Model Inference',
          status: 'major_outage'
        }]
      },
      {
        status: 'investigating',
        impact: 'major',
        components: [{
          name: 'Model Management API',
          status: 'degraded_performance'
        }]
      }
    ]
  }
};

// API caching system
const apiCache = {
  data: null,
  timestamp: 0,
  
  isCacheValid() {
    return this.data && (Date.now() - this.timestamp < STATUS_CONFIG.cacheLifetime);
  },
  
  updateCache(data) {
    this.data = data;
    this.timestamp = Date.now();
    return data;
  },
  
  getData() {
    return this.data;
  }
};

// Helper functions
function getComponentStatus(componentStatus, incidentImpact) {
  if (componentStatus && STATUS_MAPPING[componentStatus]) {
    return STATUS_MAPPING[componentStatus];
  }
  
  if (incidentImpact) {
    switch (incidentImpact) {
      case 'critical':
        return STATUS_MAPPING.major_outage;
      case 'major':
      case 'minor':
        return STATUS_MAPPING.degraded_performance;
    }
  }
  
  return DEFAULT_COMPONENT_STATUS;
}

function updateComponentStatus(component, statusComponents) {
  const displayName = COMPONENT_MAP[component.name];
  if (!displayName || !statusComponents[displayName]) return;
  
  const status = getComponentStatus(component.new_status || component.status, component.impact);
  if (status.status !== 'operational') {
    statusComponents[displayName] = status;
  }
}

function updateStatusBanner(statusCards, statusComponents) {
  const statusBanner = statusCards.querySelector('.status-banner');
  if (!statusBanner) return;
  
  const bannerIcon = statusBanner.querySelector('.banner-icon svg');
  const bannerText = statusBanner.querySelector('.banner-text');
  const hasIssues = Object.values(statusComponents).some(component => component.status !== 'operational');
  
  if (hasIssues) {
    if (bannerIcon) {
      bannerIcon.setAttribute('data-icon', 'circle-exclamation');
      bannerIcon.parentNode.style.color = '#ffcc00';
    }
    if (bannerText) {
      bannerText.textContent = 'Some systems are experiencing issues.';
      statusBanner.style.backgroundColor = 'rgba(255, 204, 0, 0.05)';
      statusBanner.style.borderColor = 'rgba(255, 204, 0, 0.2)';
    }
  } else {
    if (bannerIcon) {
      bannerIcon.setAttribute('data-icon', 'circle-check');
      bannerIcon.parentNode.style.color = '#25c73e';
    }
    if (bannerText) {
      bannerText.textContent = 'All systems are operational.';
      statusBanner.style.backgroundColor = 'rgba(37, 199, 62, 0.05)';
      statusBanner.style.borderColor = 'rgba(37, 199, 62, 0.2)';
    }
  }
}

function updateStatusCards(incidents) {
  const statusCards = document.getElementById('status-cards');
  if (!statusCards) return;

  const statusComponents = {
    'Model Inference': { ...DEFAULT_COMPONENT_STATUS },
    'Management API': { ...DEFAULT_COMPONENT_STATUS },
    'Web Application': { ...DEFAULT_COMPONENT_STATUS }
  };

  // Process incidents to update status
  if (incidents && incidents.length > 0) {
    incidents.forEach(incident => {
      if (incident.status !== 'resolved') {
        // Find the latest update with affected components
        let latestUpdate = null;
        let latestAffectedComponents = [];
        
        if (incident.incident_updates?.length > 0) {
          for (const update of incident.incident_updates) {
            if (update.affected_components?.length > 0) {
              if (!latestUpdate || new Date(update.created_at) > new Date(latestUpdate.created_at)) {
                latestUpdate = update;
                latestAffectedComponents = update.affected_components;
              }
            }
          }
        }
        
        // Process components from the latest update or fall back to incident components
        const componentsToProcess = latestAffectedComponents.length > 0 
          ? latestAffectedComponents 
          : incident.components || [];
        
        componentsToProcess.forEach(component => updateComponentStatus(component, statusComponents));
      }
    });
  }

  // Update the status banner
  updateStatusBanner(statusCards, statusComponents);

  // Update individual cards
  const cards = statusCards.querySelectorAll('.card');
  cards.forEach(card => {
    const title = card.querySelector('h2')?.textContent.trim();
    if (title && statusComponents[title]) {
      const component = statusComponents[title];
      
      // Update the icon SVG
      const iconSvg = card.querySelector('.h-6.w-6 svg');
      if (iconSvg) {
        iconSvg.style.maskImage = `url("https://mintlify.b-cdn.net/v6.6.0/solid/${component.icon}.svg")`;
        iconSvg.style.maskRepeat = 'no-repeat';
        iconSvg.style.maskPosition = 'center center';
      }
      
      // Update status text and card status
      const statusIndicator = card.querySelector('.status-indicator');
      if (statusIndicator) {
        statusIndicator.textContent = component.text;
        
        // Update card status
        if (component.status === 'operational') {
          card.removeAttribute('data-status');
        } else {
          card.setAttribute('data-status', component.status);
        }
      }
    }
  });
  
  // Update last refreshed time
  const lastRefreshed = document.getElementById('last-refreshed');
  if (lastRefreshed) {
    lastRefreshed.textContent = `Last updated: ${new Date().toLocaleTimeString()}`;
  }
}

// Status page functionality
window.addEventListener("load", function() {
  initStatusPageIfNeeded();
  
  // Setup title observer to detect page changes
  setupTitleObserver();
});

// Monitor title changes to detect page navigation
function setupTitleObserver() {
  // Track the current page to avoid duplicate checks
  let currentPath = window.location.pathname;
  
  // Create an observer instance to watch for title changes
  const titleObserver = new MutationObserver(function(mutations) {
    // If the URL has changed, check if we're on the status page
    if (window.location.pathname !== currentPath) {
      currentPath = window.location.pathname;
      
      // If we're on the status page, initialize it
      if (currentPath.includes('/status/status')) {
        setTimeout(initStatusPageIfNeeded, 100);
      }
    }
  });
  
  // Start observing the document title for changes
  const target = document.querySelector('title');
  if (target) {
    titleObserver.observe(target, { subtree: true, characterData: true, childList: true });
  }
  
  // Also listen for popstate events (browser back/forward buttons)
  window.addEventListener('popstate', function() {
    if (window.location.pathname.includes('/status/status')) {
      setTimeout(initStatusPageIfNeeded, 100);
    }
  });
}

// Check if we're on the status page and initialize if needed
function initStatusPageIfNeeded() {
  const statusCards = document.getElementById('status-cards');
  if (statusCards) {
    // Check if this is the first initialization or a refresh
    if (!statusCards.hasAttribute('data-initialized')) {
      // Mark as initialized to prevent duplicate initializations
      statusCards.setAttribute('data-initialized', 'true');
      
      // Initialize the status page
      initStatusPage();
    }
  }
}

// Main function to initialize status page
async function initStatusPage() {
  console.log('Initializing status page');
  
  // Fetch incidents data using cache if possible
  const incidents = await fetchStatusData();

  // Update status cards with the fetched data
  updateStatusCards(incidents);
  
  // Set up auto-refresh every 5 minutes
  setInterval(async () => {
    // Force a cache refresh by setting timestamp to 0
    apiCache.timestamp = 0;
    const refreshedIncidents = await fetchStatusData();
    updateStatusCards(refreshedIncidents);
  }, STATUS_CONFIG.refreshInterval);
}

// Function to fetch status data from StatusPage.io
function fetchStatusData() {
  // If in test mode, return test data
  if (STATUS_CONFIG.testMode) {
    console.log('Using test scenario:', STATUS_CONFIG.testScenario);
    return Promise.resolve(TEST_SCENARIOS[STATUS_CONFIG.testScenario].incidents);
  }

  // Check if we have a valid cache
  if (apiCache.isCacheValid()) {
    console.log('Using cached status data');
    return Promise.resolve(apiCache.getData());
  }
  
  console.log('Fetching fresh status data from API');
  return fetch(`https://api.statuspage.io/v1/pages/${STATUS_CONFIG.statusPageId}/incidents`, {
    headers: {
      'Authorization': `OAuth ${STATUS_CONFIG.apiKey}`,
      'Content-Type': 'application/json'
    }
  })
  .then(response => {
    if (!response.ok) {
      throw new Error(`HTTP error! Status: ${response.status}`);
    }
    return response.json();
  })
  .then(data => apiCache.updateCache(data))
  .catch(error => {
    console.error('Error fetching status data:', error);
    // If there's an error, use cached data if available, or return empty array
    return apiCache.getData() || [];
  });
}


import { OpenAI } from "openai";

async function main() {

    // Initialize the client
    const client = new OpenAI({
        baseURL: "https://inference.baseten.co/v1",
        apiKey: process.env.BASETEN_API_KEY,
    });

    // Use the client
    try {
        const response = await client.chat.completions.create({
            model: "deepseek-ai/DeepSeek-V3-0324",
            messages: [
                { role: "system", content: "You are a helpful assistant." },
                { role: "user", content: "Hello, how are you?" },
            ],
        });
        console.log(response.choices[0].message.content);
    } catch (error) {
        console.error("Error making API call:", error);
    }
}

main(); 

#!/usr/bin/env node
import fs from 'fs';
import path from 'path';
import { OpenAI } from 'openai';
import { fileURLToPath } from 'url';
import { execSync } from 'child_process';

// Get the API key from environment
const apiKey = process.env.BASETEN_API_KEY;
if (!apiKey) {
    console.error("Error: BASETEN_API_KEY environment variable is not set.");
    process.exit(1);
}

// Function to extract only client code for documentation
function extractCode(filePath) {
    try {
        const content = fs.readFileSync(filePath, 'utf8');
        
        // Find the client initialization
        const clientMatch = content.match(/const\s+client\s*=\s*new\s+OpenAI\(/);
        if (clientMatch) {
            const startIdx = clientMatch.index;
            let code = content.substring(startIdx);
            
            // Remove main() function call if present
            const mainCallIdx = code.lastIndexOf('main();');
            if (mainCallIdx > 0) {
                code = code.substring(0, mainCallIdx).trim();
            }
            
            // Also try to remove closing braces of outer functions
            const lastBraceIdx = code.lastIndexOf('});');
            if (lastBraceIdx > 0) {
                code = code.substring(0, lastBraceIdx + 2).trim();
            }
            
            return code;
        }
        
        return content;
    } catch (error) {
        console.error(`Error reading file: ${error}`);
        process.exit(1);
    }
}

// Check command line arguments
if (process.argv.length < 3) {
    console.error(`Usage: ${process.argv[0]} ${process.argv[1]} <test_file_path> [extract]`);
    process.exit(1);
}

const testFilePath = process.argv[2];

if (process.argv.length > 3 && process.argv[3] === "extract") {
    console.log(extractCode(testFilePath));
} else {
    try {
        // Get the directory of the test file
        const testFileDir = path.dirname(path.resolve(testFilePath));
        const testFileName = path.basename(testFilePath);
        
        // Create a temporary file that imports and runs the test
        const tempDir = path.dirname(fileURLToPath(import.meta.url));
        const tempFile = path.join(tempDir, '_temp_runner.mjs');
        
        // Read test file content
        const testContent = fs.readFileSync(testFilePath, 'utf8');
        
        // Create a standalone ESM file that runs the test code
        const wrapperCode = `
// ESM Wrapper for test file
import { OpenAI } from 'openai';
import fs from 'fs';
import path from 'path';

// Set up the environment
const apiKey = process.env.BASETEN_API_KEY;
if (!apiKey) {
    console.error("Error: BASETEN_API_KEY environment variable is not set.");
    process.exit(1);
}

// Define a safer async main function
async function runTest() {
    try {
        // Extract the main function from the test
        ${testContent.replace(/import[\s\S]*?openai";/, '// Imports handled by wrapper')}
        
        // Execute the test
        if (typeof main === 'function') {
            await main();
        }
    } catch (error) {
        console.error('Error executing test:', error);
        process.exit(1);
    }
}

// Run the test
runTest();
`;
        
        fs.writeFileSync(tempFile, wrapperCode);
        
        // Execute the wrapper with explicit ESM flag
        execSync(`node --experimental-modules ${tempFile}`, { stdio: 'inherit' });
        
        // Clean up the temporary file
        fs.unlinkSync(tempFile);
        
    } catch (error) {
        console.error(`Error running the test file: ${error}`);
        process.exit(1);
    }
} 

import { OpenAI } from "openai";


async function main() {
    
    // Initialize the client
    const client = new OpenAI({
        baseURL: "https://inference.baseten.co/v1",
        apiKey: process.env.BASETEN_API_KEY,
    });

    // Use the client for structured output
    try {
        const response = await client.chat.completions.create({
            model: "deepseek-ai/DeepSeek-V3-0324",
            messages: [
                { role: "system", content: "You are an expert at extracting information." },
                { role: "user", content: "My name is Jane Doe and my email is jane.doe@example.com. I'd like to know more about your services." },
            ],
            response_format: {
                type: "json_object",
                json_schema: {
                    name: "user_details",
                    description: "User contact information",
                    schema: {
                        type: "object",
                        properties: {
                            name: { 
                                type: "string", 
                                description: "The user's full name" 
                            },
                            email: { 
                                type: "string", 
                                description: "The user's email address" 
                            }
                        },
                        required: ["name", "email"]
                    },
                    strict: true
                }
            }
        });

        // Parse and print the JSON output
        const content = response.choices[0].message.content;
        const parsedContent = JSON.parse(content);
        console.log("Structured Output:");
        console.log(JSON.stringify(parsedContent, null, 2));
    } catch (error) {
        console.error("Error making API call:", error);
    }
}

main(); 

import { OpenAI } from "openai";

// Simple weather function (would call real API in production)
function getWeather(location) {
    return JSON.stringify({ location, temperature: "72", forecast: "sunny" });
}

async function main() {
    // Initialize the client
    const client = new OpenAI({
        baseURL: "https://inference.baseten.co/v1",
        apiKey: process.env.BASETEN_API_KEY,
    });

    // Make initial request with tools
    const response = await client.chat.completions.create({
        model: "deepseek-ai/DeepSeek-V3-0324",
        messages: [{ role: "user", content: "What's the weather like in Boston?" }],
        tools: [{
            type: "function",
            function: {
                name: "get_weather",
                description: "Get the current weather",
                parameters: {
                    type: "object",
                    properties: {
                        location: { type: "string", description: "City name" }
                    },
                    required: ["location"]
                }
            }
        }]
    });

    // Process tool calls if any
    if (response.choices[0].message.tool_calls) {
        const toolCall = response.choices[0].message.tool_calls[0];
        const args = JSON.parse(toolCall.function.arguments);
        
        // Call function and get result
        const functionResponse = getWeather(args.location);
        
        // Submit function result back to model
        const messages = [
            { role: "user", content: "What's the weather like in Boston?" },
            response.choices[0].message,
            { tool_call_id: toolCall.id, role: "tool", name: "get_weather", content: functionResponse }
        ];
        
        const finalResponse = await client.chat.completions.create({
            model: "deepseek-ai/DeepSeek-V3-0324",
            messages: messages
        });
        
        console.log(finalResponse.choices[0].message.content);
    }
}

main(); 

1. Bundling model weights in Truss

Step 1: Define AWS secrets in config.yaml

Step 2: Authenticate with AWS in model.py

Step 3: Deploy

2. Loading private model weights from S3

Load model weights without Hugging Face or S3

Data and storage

Baseten

Baseten is a platform for deploying and serving AI models performantly, scalably, and cost-efficiently.

Overview

Documentation

Quick start

Baseten delivers fast, scalable AI/ML inference with enterprise-grade security and reliability—whether in our cloud or yours.

Why Baseten

Baseten is a platform for building, serving, and scaling AI models in production.

How Baseten works

Concepts

Deploy, manage, and scale machine learning models with Baseten

Deployments

Manage your model’s release cycles with environments.

Environments

Resources

Autoscaling dynamically adjusts the number of active replicas to **handle variable traffic** while minimizing idle compute costs.

Autoscaling

Call your model

How to call a model that has a streaming-capable endpoint.

Streaming

Run asynchronous inference on deployed models

Async inference

Integrate your models with tools like LangChain, LiteLLM, and more.

Integrations

An introduction to Baseten Training for streamlining and managing the model training lifecycle.

Your first steps to creating and running training jobs on Baseten.

Getting started

Understanding the conceptual framework of Baseten Training for effective model development.

How to monitor, manage, and interact with your Baseten Training projects and jobs.

Management

How to deploy checkpoints from Baseten Training jobs as usable models.

Deploying checkpoints

Understand the load and performance of your model

Metrics

Every model deployment in your Baseten workspace has a status to represent its activity and health.

Status and health

Investigate the prediction flow in detail

Tracing

Manage payments and track overall Baseten usage

Billing and usage

Troubleshoot common problems during model deployment

Troubleshoot common problems during model inference

Inference

Building with Baseten

Deploy your first model

Optimize LLMs for low latency and high throughput

Fast LLMs with TensorRT-LLM

Run any LLM with vLLM

Deploy LLMs with SGLang

Build a RAG (retrieval-augmented generation) pipeline with  Chains

RAG pipeline with Chains

Process hours of audio in seconds using efficient chunking, distributed inference, and optimized GPU resources.

Transcribe audio with Chains

Building a text-to-image model with Flux Schnell

Image generation

Deploy your ComfyUI workflow as an API endpoint

Deploy a ComfyUI project

Serve embedding, reranking, and classification models

Embeddings with BEI

Deploy any model in a pre-built Docker container

Dockerized model

LLM with Streaming

Building a text-to-speech model with Kokoro

Text to speech

Browse our library of open source models that are ready to deploy behind an API endpoint in seconds.

For deploying, managing, and interacting with machine learning models on Baseten.

Reference documentation

Set your model resources, dependencies, and more

Configure Truss

The inference API is used to call deployed models and chains.

The management API is used to manage models and deployments. It supports monitoring, CI/CD, and automation at both the model and workspace levels.

Deploy, manage, and develop Chains using the Truss CLI.

Chains

Chains CLI reference

Deploy, manage, and monitor training jobs using the Truss CLI.

Training

Training CLI reference

Python SDK for deploying and managing models with Truss.

Truss

Truss SDK Reference

Chains SDK Reference

Reference documentation for Baseten's Training SDK classes and configuration.

Current operational status of Baseten's services.

Baseten platform status

Fast, scalable inference in our cloud or yours

Welcome to Baseten!

Truss: Package and deploy AI models on Baseten

Getting Started

Text-to-image

Deploy a language model, with the model weights cached at build time

Fast Cold Starts with Cached Weights

Load a model that requires authentication with Hugging Face

Private Hugging Face Model

Deploy a model with both Python and system dependencies

Model with system packages

A guide to configuring a base image for your truss

Base Docker images

Deploy Custom Server from Docker image

Model weights

Deploy a model that makes use of pre-process

Pre/post-processing

Streaming output with an LLM

A guide on configuring your truss to use external packages

External (source) packages

How to run your own docker commands during the build stage

Running custom docker commands

Private Hugging Face model

Enable fast cold starts for a model with private Hugging Face weights

Deploy Llama 2 with Caching

A guide to setting concurrency for your model

Request concurrency

Accelerate cold starts by caching your weights

Caching model weights

A guide to using secrets securely in your ML models

Storing secrets in Baseten

Get more control by directly using the request object.

Using request objects / Cancellation

Get more control by directly creating the response object.

Returning response objects and SSEs

A guide to leveraging environments in your models

Access model environments

Customize the health of your deployments.

🆕 Custom health checks

Use code-first development tools to streamline model production.

🆕 Python driven configuration for models

Deployments and environments

Serve your model on the right instance type

Setting GPU resources

Scale from internal testing to the top of Hacker News

Fixing common problems during model deployment

Troubleshooting

How to call your model

How to stream model output

Async inference user guide

Secure the asynchronous inference results sent to your webhook

Securing async inference

Enforce an output schema on LLM inference

Structured output (JSON mode)

Use an LLM to select amongst provided tools

Function calling (tool use)

How to parse base64 output

How to do model I/O in binary

How to do model I/O with files

Use your Baseten models with tools like LangChain

Baseten model integrations

Fixing common problems during model inference

Chains: A new DX for deploying multi-component ML workflows

Build your first Chain

Glossary of Chains concepts and terminology

Architecture & Design

Local Development

Deploy

Invocation

Watch

Modularize and re-use Chainlet implementations

Subclassing

Streaming outputs, reducing latency, SSEs

Binary IO

Error Handling

Integrate deployed Truss models with stubs

Truss Integration

RAG Chain

Transcribe hours of audio to text in a few seconds

Audio Transcription Chain

Model performance overview

Deploy optimized model inference servers in minutes

Engine Builder overview

Automatically build and deploy a TensorRT-LLM model serving engine

Build your first LLM engine

Use `model.py` to customize engine behavior

Engine control in Python

Configure your TensorRT-LLM inference engine

Engine Builder configuration

Engineering your Truss and application for faster cold starts

How to get faster cold starts

Handle variable throughput with this autoscaling parameter

Setting concurrency

Specs and recommendations for every instance type on Baseten

Instance type reference

Reading model metrics

Export metrics from Baseten to your observability stack

Metrics export overview

Export metrics from Baseten to Prometheus

Export metrics to Prometheus

Export metrics to Datadog

Export metrics from Baseten to Grafana Cloud

Get started

Concepts

Development

Deployment

Inference

Training

Observability

Troubleshooting

Data and storage

1. Bundling model weights in Truss

2. Loading private model weights from S3

Step 1: Define AWS secrets in `config.yaml`

Step 2: Authenticate with AWS in `model.py`

Step 3: Deploy

Get started

Concepts

Development

Deployment

Inference

Training

Observability

Troubleshooting

​1. Bundling model weights in Truss

​2. Loading private model weights from S3

​Step 1: Define AWS secrets in config.yaml

​Step 2: Authenticate with AWS in model.py

​Step 3: Deploy

1. Bundling model weights in Truss

2. Loading private model weights from S3

Step 1: Define AWS secrets in `config.yaml`

Step 2: Authenticate with AWS in `model.py`

Step 3: Deploy