> ## Documentation Index
> Fetch the complete documentation index at: https://docs.baseten.co/llms.txt
> Use this file to discover all available pages before exploring further.

# Rolling deployments

> Gradually shift traffic to a new deployment with replica-based rolling deployments.

export const RollingDeployConfig = () => {
  const ref = React.useRef(null);
  const init = React.useRef(false);
  React.useEffect(() => {
    if (!ref.current || init.current) return;
    init.current = true;
    const r = ref.current;
    const fs = "display:flex;justify-content:space-between;align-items:center;padding:6px 0;";
    function styles() {
      const dark = document.documentElement.classList.contains("dark");
      return {
        is: dark ? "width:56px;padding:3px 6px;border:1px solid #203026;border-radius:5px;font-family:SF Mono,ui-monospace,monospace;font-size:12px;text-align:right;color:#FFFFFF;background:#0C1D13;" : "width:56px;padding:3px 6px;border:1px solid #e5e7eb;border-radius:5px;font-family:SF Mono,ui-monospace,monospace;font-size:12px;text-align:right;color:#111827;",
        cs: dark ? "font-size:11px;font-family:SF Mono,ui-monospace,monospace;color:#B2BAB3;background:#0C1D13;padding:1px 4px;border-radius:3px;" : "font-size:11px;font-family:SF Mono,ui-monospace,monospace;color:#111827;background:#f9fafb;padding:1px 4px;border-radius:3px;",
        btn: "margin-top:6px;padding:5px 20px;background:#00B86B;color:#fff;border:none;border-radius:6px;font-size:11px;font-weight:600;cursor:pointer;font-family:inherit;"
      };
    }
    let s0 = styles();
    const fields = [["max_surge_percent", "s", 0, 50, 10], ["max_unavailable_percent", "u", 0, 50, 0], ["stabilization_time_seconds", "st", 0, 3600, 0], ["replica_overhead_percent", "oh", 0, 60, 0]];
    const grid = document.createElement("div");
    grid.style.cssText = "display:grid;grid-template-columns:1fr 1fr;gap:2px 16px;";
    const inputs = {};
    const codes = [];
    fields.forEach(function (f) {
      const row = document.createElement("div");
      row.style.cssText = fs;
      const code = document.createElement("code");
      code.textContent = f[0];
      code.style.cssText = s0.cs;
      const inp = document.createElement("input");
      inp.type = "number";
      inp.min = f[2];
      inp.max = f[3];
      inp.value = f[4];
      inp.style.cssText = s0.is;
      inp.id = "rd-" + f[1];
      inputs[f[1]] = inp;
      codes.push(code);
      row.appendChild(code);
      row.appendChild(inp);
      grid.appendChild(row);
    });
    r.appendChild(grid);
    const btn = document.createElement("button");
    btn.textContent = "Apply";
    btn.style.cssText = s0.btn;
    btn.onclick = function () {
      var s = parseInt(inputs.s.value) || 0, u = parseInt(inputs.u.value) || 0;
      if (s > 0 && u > 0) {
        u = 0;
        inputs.u.value = 0;
      }
      if (s === 0 && u === 0) {
        s = 10;
        inputs.s.value = 10;
      }
      var st = parseInt(inputs.st.value) || 0, oh = parseInt(inputs.oh.value) || 0;
      if (window._rdvApply) window._rdvApply(s, u, st, oh); else window._rdvPending = [s, u, st, oh];
    };
    r.appendChild(btn);
    const mo = new MutationObserver(function () {
      const ns = styles();
      Object.keys(inputs).forEach(function (k) {
        inputs[k].style.cssText = ns.is;
      });
      codes.forEach(function (c) {
        c.style.cssText = ns.cs;
      });
      btn.style.cssText = ns.btn;
    });
    mo.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ["class"]
    });
    return function () {
      mo.disconnect();
      init.current = false;
    };
  }, []);
  return <div ref={ref} />;
};

export const RollingDeployViz = () => {
  const ref = React.useRef(null);
  const initialized = React.useRef(false);
  React.useEffect(() => {
    if (!ref.current || initialized.current) return;
    initialized.current = true;
    const root = ref.current;
    let cleanup = null, destroyed = false, timer = null, retries = 0;
    const tryMount = () => {
      if (destroyed || !ref.current) return;
      if (window._rdEngine) {
        cleanup = mount();
      } else if (retries++ < 60) {
        timer = setTimeout(tryMount, 30);
      }
    };
    function mount() {
      const E = window._rdEngine;
      const W = E.W, H = E.H, N = E.N, STAGES = E.STAGES;
      let cfg = {
        s: 10,
        u: 0,
        st: 0,
        oh: 0
      }, cTT = 0, frozen = null;
      function cP() {
        const f = N + Math.round(N * cfg.oh / 100);
        const m = cfg.s > 0 ? "s" : "u";
        const sp = m === "s" ? cfg.s : cfg.u;
        const rs = Math.max(1, Math.round(sp * f / 100));
        return {
          f: f,
          m: m,
          rs: rs,
          ts: Math.ceil(100 * rs / f),
          sb: cfg.st > 0
        };
      }
      let P = cP();
      window._rdvApply = function (s, u, st, oh) {
        cfg = {
          s: s,
          u: u,
          st: st,
          oh: oh
        };
        P = cP();
        cTT = 0;
        curActive = P.f;
        candActive = 0;
        trafficPct = 0;
        targetTraffic = 0;
        phase = "ready";
        phaseStart = simT;
        scalingServer = {
          pool: null,
          idx: -1
        };
        requests = [];
        frozen = null;
        rebuildTabs();
        applyTheme();
      };
      let COL = E.getColors();
      const card = document.createElement("div");
      const strip = document.createElement("div");
      const stripL = document.createElement("div");
      stripL.style.cssText = "display:flex;flex-direction:column;gap:6px;flex:1;min-width:280px";
      const stripLabel = document.createElement("span");
      stripLabel.style.cssText = "font:500 10px ui-monospace,Menlo,monospace;text-transform:lowercase;color:#869089";
      stripLabel.textContent = "lifecycle status";
      const tabRow = document.createElement("div");
      tabRow.style.cssText = "display:flex;gap:6px;flex-wrap:wrap";
      const stripDesc = document.createElement("span");
      stripDesc.style.cssText = "font:400 12px system-ui,-apple-system,sans-serif;color:#869089";
      stripL.appendChild(stripLabel);
      stripL.appendChild(tabRow);
      stripL.appendChild(stripDesc);
      strip.appendChild(stripL);
      card.appendChild(strip);
      const canvas = document.createElement("canvas");
      canvas.style.cssText = "display:block;width:100%;max-width:" + W + "px;touch-action:pan-y";
      card.appendChild(canvas);
      root.appendChild(card);
      const ctx = canvas.getContext("2d");
      const dpr = window.devicePixelRatio || 1;
      canvas.width = W * dpr;
      canvas.height = H * dpr;
      canvas.style.height = H + "px";
      ctx.scale(dpr, dpr);
      function rebuildTabs() {
        tabRow.replaceChildren();
        STAGES.forEach(s => {
          const b = document.createElement("button");
          b.style.cssText = "padding:5px 12px;border-radius:6px;cursor:pointer;font:500 11px ui-monospace,Menlo,monospace";
          b.textContent = s;
          b.onclick = () => {
            if (frozen === s) {
              frozen = null;
              phaseStart = simT;
            } else {
              frozen = s;
              applyStage(s);
            }
            applyTheme();
          };
          tabRow.appendChild(b);
        });
      }
      function applyTheme() {
        COL = E.getColors();
        const d = E.isDark();
        card.style.cssText = "border:1px solid " + COL.brd + ";border-radius:10px;padding:18px 20px;margin:14px 0;background:" + COL.bg + ";max-width:" + W + "px";
        strip.style.cssText = "display:flex;flex-wrap:wrap;align-items:center;gap:14px;padding:12px 14px;border:1px solid " + COL.brdM + ";background:" + COL.surf + ";border-radius:8px;margin:0 0 12px";
        const cur = currentStage();
        Array.from(tabRow.children).forEach((b, i) => {
          const s = STAGES[i], active = s === cur, sc = E.stageColor(COL, s);
          b.style.background = active ? sc : d ? "#0C1D13" : "#fff";
          b.style.color = active ? "#fff" : COL.text;
          b.style.border = "1px solid " + (active ? sc : COL.brd);
        });
        stripDesc.textContent = frozen != null ? "Frozen on " + frozen + ". Click again to resume the cycle." : "Click any status to freeze the simulation on that stage.";
      }
      const LB_X = 95, LB_Y = H / 2, POOL_X = 230, CUR_Y = 60, CAND_Y = 170, LB_W = 96, LB_H = 28;
      function srvPos(i) {
        return POOL_X + i * (E.SRV_W + E.SRV_GAP);
      }
      let curActive = P.f, candActive = 0, trafficPct = 0, targetTraffic = 0;
      let phase = "ready", phaseStart = 0;
      let scalingServer = {
        pool: null,
        idx: -1
      };
      let requests = [], nextReq = 0, now = 0, simT = 0, visible = true;
      const obs = new IntersectionObserver(e => visible = e[0].isIntersecting, {
        threshold: 0.15
      });
      obs.observe(canvas);
      function pP() {
        if (candActive === 0) return "current";
        if (curActive === 0) return "candidate";
        return Math.random() * 100 < trafficPct ? "candidate" : "current";
      }
      function rS(p) {
        const c = p === "current" ? curActive : candActive;
        if (c === 0) return null;
        return {
          x: srvPos(Math.floor(Math.random() * c)) + E.SRV_W / 2,
          y: p === "current" ? CUR_Y : CAND_Y
        };
      }
      function spawn() {
        const p = pP(), t = rS(p);
        if (!t) return;
        requests.push({
          x: LB_X,
          y: LB_Y,
          tx: t.x,
          ty: t.y,
          pool: p,
          state: "fly",
          life: 0,
          maxLife: 600 + Math.random() * 500,
          alpha: 1
        });
      }
      function uR(dt) {
        for (let i = requests.length - 1; i >= 0; i--) {
          const r = requests[i];
          if (r.state === "fly") {
            const dx = r.tx - r.x, dy = r.ty - r.y, d = Math.sqrt(dx * dx + dy * dy);
            if (d < E.REQ_SPEED * dt) {
              r.x = r.tx;
              r.y = r.ty;
              r.state = "proc";
              r.life = 0;
            } else {
              r.x += dx / d * E.REQ_SPEED * dt;
              r.y += dy / d * E.REQ_SPEED * dt;
            }
          } else if (r.state === "proc") {
            r.life += dt;
            if (r.life > r.maxLife) r.state = "fade";
          } else {
            r.x += E.REQ_SPEED * dt * .08;
            r.alpha -= .02;
            if (r.alpha <= 0) requests.splice(i, 1);
          }
        }
      }
      function currentStage() {
        return frozen != null ? frozen : phase === "ready" ? "RELEASING" : phase === "done" ? "SUCCEEDED" : "RAMPING_UP";
      }
      function applyStage(s) {
        const half = Math.max(1, Math.floor(P.f / 2));
        if (s === "RELEASING") {
          curActive = P.f;
          candActive = 0;
          trafficPct = 0;
          targetTraffic = 0;
          cTT = 0;
          phase = "ready";
        } else if (s === "RAMPING_UP" || s === "PAUSED") {
          curActive = P.f - half;
          candActive = half;
          trafficPct = 40;
          targetTraffic = 40;
          cTT = 40;
          phase = "shift";
        } else if (s === "SUCCEEDED") {
          curActive = 0;
          candActive = P.f;
          trafficPct = 100;
          targetTraffic = 100;
          cTT = 100;
          phase = "done";
        }
        scalingServer = {
          pool: null,
          idx: -1
        };
        phaseStart = simT;
      }
      let prevStage = currentStage();
      function draw() {
        ctx.clearRect(0, 0, W, H);
        trafficPct = E.lerp(trafficPct, targetTraffic, 0.04);
        const cur = currentStage();
        if (cur !== prevStage) {
          prevStage = cur;
          applyTheme();
        }
        E.txt(ctx, COL, "Requests", 18, LB_Y - 16, {
          size: 10,
          color: COL.muted,
          align: "center"
        });
        for (let i = 0; i < 3; i++) E.circ(ctx, 8 + i * 10, LB_Y + 2, 3, COL.green, 0.25 + Math.sin(now / 350 + i * 1.2) * 0.2);
        E.rr(ctx, LB_X - LB_W / 2, LB_Y - LB_H / 2, LB_W, LB_H, 8, COL.text);
        E.txt(ctx, COL, "Load balancer", LB_X, LB_Y, {
          size: 10,
          weight: "700",
          color: COL.white,
          align: "center"
        });
        const as = LB_X + LB_W / 2 + 4, ae = POOL_X - 10;
        const cHT = curActive > 0 && trafficPct < 99, dHT = candActive > 0 && trafficPct > 1;
        E.dA(ctx, COL, as, LB_Y, ae, CUR_Y, cHT, Math.max(1.5, (1 - trafficPct / 100) * 3.5));
        E.dA(ctx, COL, as, LB_Y, ae, CAND_Y, dHT, Math.max(1.5, trafficPct / 100 * 3.5));
        const mx = (as + ae) / 2;
        if (cHT) E.txt(ctx, COL, Math.round(100 - trafficPct) + "%", mx, CUR_Y - 12, {
          size: 10,
          color: COL.muted,
          align: "center"
        });
        if (dHT) E.txt(ctx, COL, Math.round(trafficPct) + "%", mx, CAND_Y + 14, {
          size: 10,
          color: COL.muted,
          align: "center"
        });
        const pcx = POOL_X + (P.f * (E.SRV_W + E.SRV_GAP) - E.SRV_GAP) / 2;
        E.txt(ctx, COL, "Current deployment", pcx, CUR_Y - E.SRV_H / 2 - 12, {
          size: 10,
          color: COL.muted,
          align: "center"
        });
        for (let i = 0; i < P.f; i++) E.dS(ctx, COL, srvPos(i), CUR_Y - E.SRV_H / 2, i < curActive, COL.green, scalingServer.pool === "current" && scalingServer.idx === i, i, now);
        E.txt(ctx, COL, "Candidate deployment", pcx, CAND_Y + E.SRV_H / 2 + 14, {
          size: 10,
          color: COL.muted,
          align: "center"
        });
        for (let i = 0; i < P.f; i++) E.dS(ctx, COL, srvPos(i), CAND_Y - E.SRV_H / 2, i < candActive, COL.bright, scalingServer.pool === "candidate" && scalingServer.idx === i, i, now);
        for (let i = 0; i < requests.length; i++) {
          const r = requests[i];
          const col = r.pool === "current" ? COL.green : COL.bright;
          let rad = E.REQ_R;
          if (r.state === "proc") rad = E.REQ_R * (1 - r.life / r.maxLife * 0.7);
          E.circ(ctx, r.x, r.y, Math.max(1.5, rad), col, r.alpha);
        }
        if (frozen === "PAUSED") {
          const px = W - 32, py = 18;
          ctx.fillStyle = COL.phaseStab;
          ctx.fillRect(px, py - 7, 4, 14);
          ctx.fillRect(px + 7, py - 7, 4, 14);
          E.txt(ctx, COL, "paused", px - 6, py, {
            size: 10,
            weight: "700",
            color: COL.phaseStab,
            align: "right",
            mono: true
          });
        }
        const descY = H - 14, tPct = Math.round(trafficPct);
        ctx.beginPath();
        ctx.moveTo(10, descY - 10);
        ctx.lineTo(W - 10, descY - 10);
        ctx.strokeStyle = COL.grayFaint;
        ctx.lineWidth = 1;
        ctx.stroke();
        E.txt(ctx, COL, curActive + " current · " + candActive + " candidate · " + tPct + "% to candidate", 10, descY, {
          size: 10,
          weight: "500",
          color: COL.faint
        });
        const modeLabel = P.m === "s" ? "max_surge " + cfg.s + "%" : "max_unavailable " + cfg.u + "%";
        E.txt(ctx, COL, modeLabel, W - 10, descY, {
          size: 10,
          weight: "400",
          color: COL.faint,
          align: "right",
          mono: true
        });
      }
      function step() {
        const el = simT - phaseStart;
        switch (phase) {
          case "ready":
            if (el > E.HOLD_MS) {
              phase = P.m === "s" ? "scale_up" : "scale_down";
              phaseStart = simT;
            }
            break;
          case "scale_up":
            scalingServer = candActive < P.f ? {
              pool: "candidate",
              idx: candActive
            } : {
              pool: null,
              idx: -1
            };
            if (el > E.PHASE_MS * 0.6) {
              candActive = Math.min(P.f, candActive + P.rs);
              scalingServer = {
                pool: null,
                idx: -1
              };
              cTT = Math.min(100, cTT + P.ts);
              targetTraffic = cTT;
              phase = "shift";
              phaseStart = simT;
            }
            break;
          case "shift":
            if (el > E.PHASE_MS) {
              if (cTT >= 100 && candActive >= P.f && curActive <= 0) {
                phase = "done";
                targetTraffic = 100;
              } else if (P.m === "s") {
                phase = "scale_down";
              } else if (P.sb) {
                phase = "stab";
              } else {
                phase = "scale_down";
              }
              phaseStart = simT;
            }
            break;
          case "scale_down":
            scalingServer = curActive > 0 ? {
              pool: "current",
              idx: curActive - 1
            } : {
              pool: null,
              idx: -1
            };
            if (el > E.PHASE_MS * 0.6) {
              curActive = Math.max(0, curActive - P.rs);
              scalingServer = {
                pool: null,
                idx: -1
              };
              if (cTT >= 100 && candActive >= P.f && curActive <= 0) {
                phase = "done";
                targetTraffic = 100;
              } else if (P.m === "s") {
                phase = P.sb ? "stab" : "scale_up";
              } else {
                phase = "scale_up";
              }
              phaseStart = simT;
            }
            break;
          case "stab":
            if (el > E.STAB_MS) {
              phase = P.m === "s" ? "scale_up" : "scale_down";
              phaseStart = simT;
            }
            break;
          case "done":
            if (el > E.HOLD_MS * 1.2) {
              curActive = P.f;
              candActive = 0;
              trafficPct = 0;
              targetTraffic = 0;
              cTT = 0;
              phase = "ready";
              phaseStart = simT;
              requests = [];
            }
            break;
        }
      }
      rebuildTabs();
      applyTheme();
      if (window._rdvPending) {
        const p = window._rdvPending;
        window._rdvPending = null;
        window._rdvApply(p[0], p[1], p[2], p[3]);
      }
      let last = 0, raf = 0;
      function loop(ts) {
        raf = requestAnimationFrame(loop);
        if (!visible) {
          last = ts;
          return;
        }
        const dt = Math.min(ts - last, 50);
        last = ts;
        now += dt;
        nextReq -= dt;
        if (nextReq <= 0 && phase !== "done") {
          spawn();
          nextReq = 1000 / E.RPS + (Math.random() - .5) * 200;
        }
        uR(dt);
        if (frozen == null) {
          simT += dt;
          step();
        }
        draw();
      }
      raf = requestAnimationFrame(loop);
      const themeObs = new MutationObserver(applyTheme);
      themeObs.observe(document.documentElement, {
        attributes: true,
        attributeFilter: ["class"]
      });
      return () => {
        cancelAnimationFrame(raf);
        obs.disconnect();
        themeObs.disconnect();
        card.remove();
      };
    }
    tryMount();
    return () => {
      destroyed = true;
      if (timer) clearTimeout(timer);
      if (cleanup) cleanup();
      initialized.current = false;
    };
  }, []);
  return <div ref={ref} />;
};

export const RollingDeployEngine = () => {
  React.useEffect(() => {
    if (window._rdEngine) return;
    const W = 620, H = 260, N = 5, RPS = 2.8, REQ_SPEED = 2.5, REQ_R = 5;
    const SRV_W = 32, SRV_H = 32, SRV_R = 8, SRV_GAP = 10, LB_SIZE = 28;
    const PHASE_MS = 1200, HOLD_MS = 2800, STAB_MS = 800;
    const STAGES = ["RELEASING", "RAMPING_UP", "PAUSED", "SUCCEEDED"];
    const isDark = () => document.documentElement.classList.contains("dark");
    const lerp = (a, b, t) => a + (b - a) * Math.min(1, t);
    function getColors() {
      if (isDark()) return {
        green: "#17D465",
        bright: "#19E76E",
        greenFill: "rgba(23,212,101,0.18)",
        brightFill: "rgba(25,231,110,0.18)",
        gray: "#344339",
        grayFaint: "#0C1D13",
        text: "#B2BAB3",
        muted: "#9CA59E",
        faint: "#869089",
        white: "#fff",
        phaseRelease: "#4A90FF",
        phaseStab: "#F7C42F",
        phaseShift: "#19E76E",
        bg: "#021309",
        brd: "#344339",
        brdM: "#203026",
        surf: "#0C1D13",
        titleC: "#dee4de",
        descC: "#9CA59E"
      };
      return {
        green: "#00B86B",
        bright: "#19E76E",
        greenFill: "rgba(0,184,107,0.12)",
        brightFill: "rgba(25,231,110,0.12)",
        gray: "#d1d5db",
        grayFaint: "#f3f4f6",
        text: "#374151",
        muted: "#6b7280",
        faint: "#9ca3af",
        white: "#fff",
        phaseRelease: "#2176FF",
        phaseStab: "#d97706",
        phaseShift: "#059669",
        bg: "#fff",
        brd: "#dee4de",
        brdM: "#f4f9f3",
        surf: "#f4f9f3",
        titleC: "#0c1d13",
        descC: "#5a675e"
      };
    }
    function stageColor(C, s) {
      return s === "SUCCEEDED" ? C.green : s === "PAUSED" ? C.phaseStab : s === "RELEASING" ? C.phaseRelease : C.phaseShift;
    }
    function setRich(el, s) {
      el.replaceChildren();
      const parts = s.split("`");
      for (let i = 0; i < parts.length; i++) {
        if (i % 2 === 0) {
          if (parts[i]) el.appendChild(document.createTextNode(parts[i]));
        } else {
          const c = document.createElement("code");
          c.textContent = parts[i];
          c.style.cssText = "font-family:ui-monospace,Menlo,monospace;font-size:0.92em;background:" + (isDark() ? "rgba(255,255,255,0.08)" : "rgba(0,0,0,0.05)") + ";padding:1px 4px;border-radius:3px";
          el.appendChild(c);
        }
      }
    }
    function rr(ctx, x, y, w, h, r, fill, stroke, lw) {
      ctx.beginPath();
      ctx.roundRect(x, y, w, h, r);
      if (fill) {
        ctx.fillStyle = fill;
        ctx.fill();
      }
      if (stroke) {
        ctx.strokeStyle = stroke;
        ctx.lineWidth = lw || 1.5;
        ctx.stroke();
      }
    }
    function circ(ctx, x, y, r, fill, a) {
      ctx.globalAlpha = a != null ? a : 1;
      ctx.beginPath();
      ctx.arc(x, y, r, 0, Math.PI * 2);
      ctx.fillStyle = fill;
      ctx.fill();
      ctx.globalAlpha = 1;
    }
    function txt(ctx, C, s, x, y, opts) {
      opts = opts || ({});
      ctx.font = (opts.weight || "600") + " " + (opts.size || 11) + "px " + (opts.mono ? "'SF Mono', ui-monospace, monospace" : "Inter, system-ui, sans-serif");
      ctx.fillStyle = opts.color || C.text;
      ctx.textAlign = opts.align || "left";
      ctx.textBaseline = "middle";
      ctx.fillText(s, x, y);
    }
    function dA(ctx, C, x1, y1, x2, y2, ac, th) {
      const a = Math.atan2(y2 - y1, x2 - x1);
      const bx = x2 - 8 * Math.cos(a), by = y2 - 8 * Math.sin(a);
      ctx.beginPath();
      ctx.moveTo(x1, y1);
      ctx.lineTo(bx, by);
      ctx.strokeStyle = ac ? C.green : C.gray;
      ctx.lineWidth = th || 2;
      ctx.lineCap = "round";
      ctx.stroke();
      ctx.beginPath();
      ctx.moveTo(x2, y2);
      ctx.lineTo(x2 - 8 * Math.cos(a - .35), y2 - 8 * Math.sin(a - .35));
      ctx.lineTo(x2 - 8 * Math.cos(a + .35), y2 - 8 * Math.sin(a + .35));
      ctx.closePath();
      ctx.fillStyle = ac ? C.green : C.gray;
      ctx.fill();
    }
    function dS(ctx, C, x, y, on, c, hl, i, now) {
      if (on) {
        rr(ctx, x, y, SRV_W, SRV_H, SRV_R, c === C.green ? C.greenFill : C.brightFill);
        rr(ctx, x, y, SRV_W, SRV_H, SRV_R, null, c, 2);
        txt(ctx, C, String(i + 1), x + SRV_W / 2, y + SRV_H / 2, {
          size: 10,
          weight: "700",
          color: c,
          align: "center"
        });
        if (hl) {
          ctx.globalAlpha = .3 + Math.sin(now / 150) * .2;
          rr(ctx, x - 3, y - 3, SRV_W + 6, SRV_H + 6, SRV_R + 2, null, c, 2);
          ctx.globalAlpha = 1;
        }
      } else {
        ctx.setLineDash([4, 3]);
        rr(ctx, x, y, SRV_W, SRV_H, SRV_R, null, C.grayFaint, 1);
        ctx.setLineDash([]);
      }
    }
    window._rdEngine = {
      W,
      H,
      N,
      RPS,
      REQ_SPEED,
      REQ_R,
      SRV_W,
      SRV_H,
      SRV_R,
      SRV_GAP,
      LB_SIZE,
      PHASE_MS,
      HOLD_MS,
      STAB_MS,
      STAGES,
      isDark,
      getColors,
      lerp,
      stageColor,
      setRich,
      rr,
      circ,
      txt,
      dA,
      dS
    };
  }, []);
  return <span />;
};

<RollingDeployEngine />

Rolling deployments replace replicas incrementally when promoting a deployment to an environment.
Instead of swapping all traffic at once, rolling deployments scale up the candidate deployment, shift traffic proportionally, and scale down the previous deployment in controlled steps.
Autoscaling continues throughout the rollout for environments where `min_replica < max_replica`, so both deployments scale up to meet traffic demand as it shifts between them.

Use rolling deployments when you need zero-downtime updates with the ability to pause, cancel, or force-complete the deployment at any point.

<Note>
  Rolling deployments are not supported for [Chains](/chains/overview). This feature is available for individual model deployments only.
</Note>

## How rolling deployments work

A rolling deployment follows a repeating three-step cycle:

1. **Scale up** candidate deployment replicas by the configured percentage.
2. **Shift traffic** proportionally to match the new replica ratio.
3. **Scale down** the previous deployment replicas by the same percentage.

This cycle repeats until all traffic and replicas run on the candidate deployment, at which point it becomes the active deployment in the environment.

The diagram below shows this cycle in action. The tab strip mirrors the promotion lifecycle: a promotion enters `RELEASING` when it starts, sits in `RAMPING_UP` while replicas scale and traffic shifts, can pause as `PAUSED`, and lands at `SUCCEEDED` once the candidate serves all traffic. Click any status to freeze the simulation on that stage, then click it again to resume.

<RollingDeployViz />

<Accordion title="Configure rolling_deploy_config">
  Adjust the values and click **Apply** to restart the simulation with your configuration.

  <RollingDeployConfig />
</Accordion>

### Provisioning modes

Rolling deployments support two mutually exclusive provisioning modes.
You must configure exactly one:

* `max_surge_percent`: Scales up candidate replicas before scaling down previous replicas.
* `max_unavailable_percent`: Scales down previous replicas before scaling up candidate replicas.

Both can't be non-zero at the same time, and both can't be zero at the same time.

## Enabling rolling deployments

Enable rolling deployments on any environment by updating the environment's promotion settings.
Rolling deployments are disabled by default.

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X PATCH \
      https://api.baseten.co/v1/models/{model_id}/environments/production \
      -H "Authorization: Api-Key $BASETEN_API_KEY" \
      -H "Content-Type: application/json" \
      -d '{
        "promotion_settings": {
          "rolling_deploy": true,
          "rolling_deploy_config": {
            "max_surge_percent": 10,
            "max_unavailable_percent": 0,
            "stabilization_time_seconds": 60,
            "replica_overhead_percent": 0
          }
        }
      }'
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    import requests
    import os

    API_KEY = os.environ.get("BASETEN_API_KEY")

    response = requests.patch(
        "https://api.baseten.co/v1/models/{model_id}/environments/production",
        headers={"Authorization": f"Api-Key {API_KEY}"},
        json={
            "promotion_settings": {
                "rolling_deploy": True,
                "rolling_deploy_config": {
                    "max_surge_percent": 10,
                    "max_unavailable_percent": 0,
                    "stabilization_time_seconds": 60,
                    "replica_overhead_percent": 0,
                },
            }
        },
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

Once rolling deployments are enabled, any subsequent [promotion to the environment](/reference/management-api/deployments/promote/promotes-a-deployment-to-an-environment) uses the rolling deployment workflow.

## Configuration reference

Configure rolling deployments through the `rolling_deploy_config` object in the environment's `promotion_settings`.

<ParamField body="max_surge_percent" type="integer" default="10">
  Percentage of additional replicas to provision during each step. Set to `0` to use max unavailable mode instead.

  **Range:** 0–50
</ParamField>

<ParamField body="max_unavailable_percent" type="integer" default="0">
  Percentage of replicas that can be unavailable during each step. Set to `0` to use max surge mode instead.

  **Range:** 0–50
</ParamField>

<ParamField body="stabilization_time_seconds" type="integer" default="0">
  Seconds to wait after each traffic shift before proceeding to the next step. Use this to monitor metrics between steps.

  **Range:** 0–3600
</ParamField>

<ParamField body="replica_overhead_percent" type="integer" default="0">
  Percentage of additional replicas to pre-provision on the current deployment before the rolling deployment starts. Useful for environments without autoscaling (`min_replica == max_replica`) or as a buffer for anticipated traffic spikes during the rollout.

  **Range:** 0–500
</ParamField>

Additional promotion settings configured at the `promotion_settings` level:

<ParamField body="rolling_deploy" type="boolean" default="false">
  Enables rolling deployments for the environment.
</ParamField>

## Deployment statuses

The `in_progress_promotion` field on the [environment detail endpoint](/reference/management-api/environments/get-an-environments-details) tracks the current state of a rolling deployment.

| Status         | Description                                                                                                                                        |
| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| `RELEASING`    | Candidate deployment is building and initializing replicas.                                                                                        |
| `RAMPING_UP`   | Scaling up candidate replicas and shifting traffic.                                                                                                |
| `PAUSED`       | Rolling deployment is paused at its current traffic split. No further promotion steps run, but in-flight replica changes and autoscaling continue. |
| `RAMPING_DOWN` | Graceful cancel in progress. Traffic is shifting back to the previous deployment.                                                                  |
| `SUCCEEDED`    | Rolling deployment completed. The candidate is now the active deployment.                                                                          |
| `FAILED`       | Rolling deployment failed. Traffic remains on the previous deployment.                                                                             |
| `CANCELED`     | Rolling deployment was canceled. Traffic returned to the previous deployment.                                                                      |

The `in_progress_promotion` object also includes `percent_traffic_to_new_version`, which reports the current percentage of traffic routed to the candidate deployment.

## Deployment control actions

Pause, resume, and force roll forward act on the rolling deployment between steps, not immediately. Replica changes already in progress finish before the action takes effect, so the rolling deployment can keep scaling for a short time after you trigger the action.

For example, if the candidate deployment is at 20% traffic and has just been told to scale from 2 to 4 replicas, clicking pause lets the candidate finish scaling to 4 replicas. The traffic split stays pinned at 20% until you resume.

### Pause

Pauses the rolling deployment. Use this to inspect metrics or logs before proceeding.

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X POST \
      https://api.baseten.co/v1/models/{model_id}/environments/production/pause_promotion \
      -H "Authorization: Api-Key $BASETEN_API_KEY"
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.post(
        "https://api.baseten.co/v1/models/{model_id}/environments/production/pause_promotion",
        headers={"Authorization": f"Api-Key {API_KEY}"},
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

### Resume

Resumes a paused rolling deployment from where it left off.

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X POST \
      https://api.baseten.co/v1/models/{model_id}/environments/production/resume_promotion \
      -H "Authorization: Api-Key $BASETEN_API_KEY"
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.post(
        "https://api.baseten.co/v1/models/{model_id}/environments/production/resume_promotion",
        headers={"Authorization": f"Api-Key {API_KEY}"},
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

### Cancel

Gracefully cancels the rolling deployment. Traffic ramps back to the previous deployment and candidate replicas scale down.

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X POST \
      https://api.baseten.co/v1/models/{model_id}/environments/production/cancel_promotion \
      -H "Authorization: Api-Key $BASETEN_API_KEY"
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.post(
        "https://api.baseten.co/v1/models/{model_id}/environments/production/cancel_promotion",
        headers={"Authorization": f"Api-Key {API_KEY}"},
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

Returns a `status` of `CANCELED` (instant cancel for non-rolling deployments) or `RAMPING_DOWN` (graceful rollback for rolling deployments).

### Force cancel

Immediately cancels the rolling deployment and returns all traffic to the previous deployment. Use this when you need to roll back without waiting for the graceful ramp-down.

<Warning>
  Force canceling may cause brief service disruption if the previous deployment
  is under-provisioned.
</Warning>

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X POST \
      https://api.baseten.co/v1/models/{model_id}/environments/production/force_cancel_promotion \
      -H "Authorization: Api-Key $BASETEN_API_KEY"
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.post(
        "https://api.baseten.co/v1/models/{model_id}/environments/production/force_cancel_promotion",
        headers={"Authorization": f"Api-Key {API_KEY}"},
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

### Force roll forward

Immediately completes the rolling deployment, shifting all traffic to the candidate deployment. This works even if the deployment is in the process of rolling back.

<Warning>
  Force rolling forward may promote an under-provisioned deployment if the
  candidate has not finished scaling up.
</Warning>

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X POST \
      https://api.baseten.co/v1/models/{model_id}/environments/production/force_roll_forward_promotion \
      -H "Authorization: Api-Key $BASETEN_API_KEY"
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.post(
        "https://api.baseten.co/v1/models/{model_id}/environments/production/force_roll_forward_promotion",
        headers={"Authorization": f"Api-Key {API_KEY}"},
    )

    print(response.json())
    ```
  </Tab>
</Tabs>

## Autoscaling during rolling deployments

For environments configured with autoscaling (`min_replica < max_replica`), Baseten continues to scale your deployment during a rolling deployment to meet traffic demand. Both the previous and candidate deployments scale up based on combined demand, and new capacity is distributed proportionally to the current traffic split.

For example, with traffic split 60/40 between the previous and candidate deployments, an additional 10 replicas of demand provisions 6 replicas to the previous deployment and 4 to the candidate.

A few constraints apply during the rolling deployment:

* Autoscaling only adds replicas during the rollout. Replicas are not removed in response to falling demand until the rolling deployment completes.
* Capacity management continues during a `PAUSED` rolling deployment. Pausing stops the traffic shift, not capacity management. If traffic increases while paused, both deployments still scale up.
* The combined replica count is capped at the environment's `max_replica`.

### Environments without autoscaling

Environments where `min_replica == max_replica` have no autoscaling configured, so replica counts stay pinned during the rolling deployment. To pre-provision additional headroom for traffic spikes, set `replica_overhead_percent` to add replicas to the previous deployment before any traffic shifts. Use `stabilization_time_seconds` to wait between steps and monitor metrics before the next traffic shift.

## Deployment cleanup

After a rolling deployment completes, the `promotion_cleanup_strategy` setting controls what happens to the previous deployment.

* `SCALE_TO_ZERO`: Scales the previous deployment to zero replicas. It remains available for reactivation. This is the default.
* `KEEP`: Leaves the previous deployment running at its current replica count.
* `DEACTIVATE`: Deactivates the previous deployment. It stops serving traffic and releases all resources.

Set it alongside your other promotion settings:

<Tabs>
  <Tab title="cURL">
    ```bash theme={"system"}
    curl -X PATCH \
      https://api.baseten.co/v1/models/{model_id}/environments/production \
      -H "Authorization: Api-Key $BASETEN_API_KEY" \
      -H "Content-Type: application/json" \
      -d '{
        "promotion_settings": {
          "promotion_cleanup_strategy": "DEACTIVATE"
        }
      }'
    ```
  </Tab>

  <Tab title="Python">
    ```python theme={"system"}
    response = requests.patch(
        "https://api.baseten.co/v1/models/{model_id}/environments/production",
        headers={"Authorization": f"Api-Key {API_KEY}"},
        json={
            "promotion_settings": {
                "promotion_cleanup_strategy": "DEACTIVATE"
            }
        },
    )

    print(response.json())
    ```
  </Tab>
</Tabs>
