; }; export const FrontierGatewayBudgetLedgerEngine = () => { React.useEffect(() => { if (window._fgBudgetMount) return; const W = 620, H = 286, padL = 46, padR = 18, padT = 38, padB = 42, N = 180; const iW = W - padL - padR, iH = H - padT - padB; const isDark = () => document.documentElement.classList.contains("dark"); const C = () => isDark() ? { text: "#dee4de", sub: "#9CA59E", grid: "#203026", chip: "#07160d", brd: "#344339", hl: "rgba(180,186,179,0.10)", acct: "#17D465", acctA: "rgba(23,212,101,0.20)", fin: "#60a5fa", eng: "#2dd4bf", amber: "#f3ba3f", red: "#fb7185", redA: "rgba(251,113,133,0.15)", body: "#dee4de" } : { text: "#021309", sub: "#5a675e", grid: "#dee4de", chip: "#ffffff", brd: "#dee4de", hl: "rgba(200,207,200,0.40)", acct: "#0e863f", acctA: "rgba(178,247,207,0.52)", fin: "#1960d3", eng: "#0f766e", amber: "#9c7400", red: "#be123c", redA: "rgba(244,63,94,0.10)", body: "#021309" }; const clamp = (v, a, b) => Math.max(a, Math.min(b, v)); function setRich(el, s) { el.replaceChildren(); const parts = s.split("`"); for (let i = 0; i < parts.length; i++) { if (i % 2 === 0) { if (parts[i]) el.appendChild(document.createTextNode(parts[i])); } else { const code = document.createElement("code"); code.textContent = parts[i]; code.style.cssText = "font-family:ui-monospace,Menlo,monospace;font-size:0.92em;background:" + (isDark() ? "rgba(255,255,255,0.08)" : "rgba(0,0,0,0.05)") + ";padding:1px 4px;border-radius:3px"; el.appendChild(code); } } } function tx(ctx, s, x, y, c, a, sz, w) { ctx.font = (w || 500) + " " + (sz || 9) + "px ui-monospace,Menlo,monospace"; ctx.fillStyle = c; ctx.textAlign = a || "left"; ctx.textBaseline = "middle"; ctx.fillText(s, x, y); } function label(ctx, s, x, y, c, sz) { ctx.font = "650 " + (sz || 11) + "px system-ui,-apple-system,sans-serif"; ctx.fillStyle = c; ctx.textAlign = "left"; ctx.textBaseline = "middle"; ctx.fillText(s, x, y); } function build() { const acct = [], fin = [], eng = [], deny = [], ticks = []; let a = 100, f = 70, e = 70; for (let i = 0; i < N; i++) { let fs = 0, es = 0, blocked = false, fOk = false, eOk = false; if (i >= 10 && i < 80) fs = 1.0; else if (i >= 80 && i < 160) es = 1.0; if (fs && a > 0 && f > 0) { f -= fs; a -= fs; fOk = true; } if (es && a > 0 && e > 0) { e -= es; a -= es; eOk = true; } if (es && !eOk && a <= 0 && e > 0) blocked = true; a = clamp(a, 0, 100); f = clamp(f, 0, 70); e = clamp(e, 0, 70); acct.push(a); fin.push(f); eng.push(e); deny.push(blocked); if ((fOk || eOk) && i % 5 === 0) ticks.push([i, eOk ? "e" : "f"]); } return { acct, fin, eng, deny, ticks }; } const data = build(); const xFor = i => padL + i / (N - 1) * iW; const yFor = v => padT + iH - v / 100 * iH; function pLine(ctx, arr, yFn) { ctx.beginPath(); for (let i = 0; i < arr.length; i++) { const x = xFor(i), y = yFn(arr[i]); if (i === 0) ctx.moveTo(x, y); else ctx.lineTo(x, y); } } function pArea(ctx, arr, yFn) { ctx.beginPath(); ctx.moveTo(xFor(0), padT + iH); for (let i = 0; i < arr.length; i++) ctx.lineTo(xFor(i), yFn(arr[i])); ctx.lineTo(xFor(N - 1), padT + iH); ctx.closePath(); } function dot(ctx, x, y, c, r) { ctx.fillStyle = c; ctx.beginPath(); ctx.arc(x, y, r || 3.5, 0, Math.PI * 2); ctx.fill(); ctx.strokeStyle = isDark() ? "#021309" : "#fff"; ctx.lineWidth = 1.5; ctx.stroke(); } window._fgBudgetMount = function (root, opts) { const cv = document.createElement("canvas"); cv.style.cssText = "display:block;width:100%;max-width:" + W + "px;height:auto;touch-action:pan-y"; cv.setAttribute("role", "img"); cv.setAttribute("aria-label", "Frontier Gateway budget over requests with shared account pool, team budgets, rejection window, and usage event ticks"); root.appendChild(cv); const ctx = cv.getContext("2d"), dpr = window.devicePixelRatio || 1; cv.width = W * dpr; cv.height = H * dpr; cv.style.height = H + "px"; ctx.scale(dpr, dpr); const strip = document.createElement("div"), sDot = document.createElement("span"), sTxt = document.createElement("div"), sTit = document.createElement("div"), sBod = document.createElement("div"); sDot.style.cssText = "flex:0 0 auto;width:8px;height:8px;border-radius:50%;margin-top:6px;background:#9CA59E"; sTxt.style.cssText = "flex:1;min-width:0"; sTit.style.cssText = "font:500 11px ui-monospace,Menlo,monospace;color:#869089;margin:0 0 2px;letter-spacing:0"; sBod.style.cssText = "font:400 13px/1.4 system-ui,-apple-system,sans-serif;margin:0"; sTxt.appendChild(sTit); sTxt.appendChild(sBod); strip.appendChild(sDot); strip.appendChild(sTxt); root.appendChild(strip); let hover = null, visible = true, raf = 0, t0 = performance.now(), dirty = true; const obs = new IntersectionObserver(e => visible = e[0].isIntersecting, { threshold: 0.15 }); obs.observe(cv); const themeObs = new MutationObserver(() => { dirty = true; }); themeObs.observe(document.documentElement, { attributes: true, attributeFilter: ["class"] }); function setHover(clientX) { const r = cv.getBoundingClientRect(); const x = (clientX - r.left) / r.width * W; hover = clamp(Math.round((x - padL) / iW * (N - 1)), 0, N - 1); dirty = true; } cv.addEventListener("mousemove", e => setHover(e.clientX)); cv.addEventListener("mouseleave", () => { hover = null; dirty = true; }); cv.addEventListener("touchstart", e => { if (e.touches[0]) setHover(e.touches[0].clientX); }, { passive: true }); cv.addEventListener("touchmove", e => { if (e.touches[0]) setHover(e.touches[0].clientX); }, { passive: true }); cv.addEventListener("touchend", () => { hover = null; dirty = true; }); function annAt(i) { return (opts.ann || []).find(a => i >= a[0] && i <= a[1]); } function drawLegend(col) { const items = [["org shared pool", col.acct], ["finance team budget", col.fin], ["engineering team budget", col.eng]]; let x = padL; for (const it of items) { ctx.strokeStyle = it[1]; ctx.lineWidth = 2; ctx.beginPath(); ctx.moveTo(x, 18); ctx.lineTo(x + 18, 18); ctx.stroke(); tx(ctx, it[0], x + 24, 18, col.sub, "left", 9); x += it[0].length * 6.6 + 42; } } function draw() { const col = C(); const active = hover == null ? Math.floor((performance.now() - t0) / 36 % N) : hover; const aA = annAt(active); ctx.clearRect(0, 0, W, H); ctx.strokeStyle = col.grid; ctx.lineWidth = 1; for (let i = 0; i <= 4; i++) { const y = yFor(i * 25); ctx.setLineDash(i ? [2, 3] : []); ctx.beginPath(); ctx.moveTo(padL, y); ctx.lineTo(padL + iW, y); ctx.stroke(); tx(ctx, i * 25 + "M", padL - 6, y, col.sub, "end", 9); } ctx.setLineDash([]); let start = null; for (let i = 0; i < N; i++) { if (data.deny[i] && start == null) start = i; if ((!data.deny[i] || i === N - 1) && start != null) { const end = data.deny[i] ? i : i - 1; ctx.fillStyle = col.redA; ctx.fillRect(xFor(start), padT, xFor(end) - xFor(start), iH); start = null; } } if (aA) { ctx.fillStyle = col.hl; ctx.fillRect(xFor(aA[0]), padT, xFor(aA[1]) - xFor(aA[0]), iH); } pArea(ctx, data.acct, yFor); ctx.fillStyle = col.acctA; ctx.fill(); pLine(ctx, data.acct, yFor); ctx.strokeStyle = col.acct; ctx.lineWidth = 2.2; ctx.stroke(); pLine(ctx, data.fin, yFor); ctx.strokeStyle = col.fin; ctx.lineWidth = 1.8; ctx.stroke(); pLine(ctx, data.eng, yFor); ctx.strokeStyle = col.eng; ctx.lineWidth = 1.8; ctx.stroke(); ctx.strokeStyle = col.brd; ctx.lineWidth = 1; ctx.beginPath(); ctx.moveTo(padL, padT + iH); ctx.lineTo(padL + iW, padT + iH); ctx.stroke(); for (const t of data.ticks) { const x = xFor(t[0]); ctx.strokeStyle = col.amber; ctx.globalAlpha = t[1] === "f" ? 0.9 : 0.55; ctx.beginPath(); ctx.moveTo(x, padT + iH + 8); ctx.lineTo(x, padT + iH + 19); ctx.stroke(); } ctx.globalAlpha = 1; tx(ctx, "TPM remaining over one minute", padL + iW, padT + iH + 30, col.sub, "right", 9); tx(ctx, "usage event ticks", padL, padT + iH + 30, col.amber, "left", 9); label(ctx, "429 rejection window", xFor(132), padT + 18, col.red, 11); drawLegend(col); const px = xFor(active); ctx.strokeStyle = col.sub; ctx.globalAlpha = 0.55; ctx.beginPath(); ctx.moveTo(px, padT); ctx.lineTo(px, padT + iH); ctx.stroke(); ctx.globalAlpha = 1; dot(ctx, px, yFor(data.acct[active]), col.acct, 4); dot(ctx, px, yFor(data.fin[active]), col.fin, 3.4); dot(ctx, px, yFor(data.eng[active]), col.eng, 3.4); const cx = Math.min(W - 134, Math.max(padL, px + 8)); ctx.fillStyle = col.chip; ctx.strokeStyle = col.brd; ctx.lineWidth = 1; ctx.beginPath(); ctx.roundRect(cx, padT + 6, 126, 48, 4); ctx.fill(); ctx.stroke(); tx(ctx, "org " + Math.round(data.acct[active]) + "M", cx + 8, padT + 18, col.acct, "left", 10); tx(ctx, "finance " + Math.round(data.fin[active]) + "M", cx + 8, padT + 32, col.fin, "left", 10); tx(ctx, "engineering " + Math.round(data.eng[active]) + "M", cx + 8, padT + 46, col.eng, "left", 10); const dotColor = aA ? aA[4] : "#9CA59E"; sDot.style.background = dotColor; setRich(sTit, aA ? aA[2] : "Hover to inspect"); setRich(sBod, aA ? aA[3] : "Move across the chart to see how one account-level pool gates each team's requests."); sBod.style.color = col.body; strip.style.cssText = "display:flex;align-items:flex-start;gap:10px;padding:10px 14px;margin:8px 0 0;border-radius:6px;min-height:56px;" + (isDark() ? "background:#0C1D13;border:1px solid #203026" : "background:#f4f9f3;border:1px solid #dee4de"); dirty = false; } function loop() { raf = requestAnimationFrame(loop); if (!visible) return; if (hover == null || dirty) draw(); } raf = requestAnimationFrame(loop); return () => { cancelAnimationFrame(raf); obs.disconnect(); themeObs.disconnect(); cv.remove(); strip.remove(); }; }; return () => { delete window._fgBudgetMount; }; }, []); return ; }; In Frontier Gateway, rate and usage limits live on the **group**, not on individual API keys. Every key minted under a group inherits the group's effective limits, so rotating a customer's credentials doesn't change what they can spend. Rate limits cap short-window throughput (per second or per minute), and usage limits cap total consumption per daily window. Both are scoped to a single (group, model slug) pair, so a group can carry separate limits for every model its keys are allowed to call. You configure both kinds of limit by passing them inside `models[].rate_limits` and `models[].usage_limits` when you call [`POST /v1/gateway/groups`](/reference/gateway/groups/create-a-group) or [`PATCH /v1/gateway/groups/{group_id}`](/reference/gateway/groups/update-a-group). Workspace API keys and the shared Model APIs product use a different limit model; for the comparison, see [Frontier Gateway versus Model APIs limits](#frontier-gateway-versus-model-apis-limits). ## Rate limits A rate limit caps short-window throughput. You attach one or more rate limits to each model slug on a group. | Field | Values | Description | | ----------- | ------------------ | --------------------------------------------------------------------- | | `type` | `TOKEN`, `REQUEST` | Whether the limit counts tokens (prompt plus completion) or requests. | | `unit` | `SECOND`, `MINUTE` | The window the threshold applies to. | | `threshold` | Integer `>= 1` | The maximum count allowed per window. | You can set both a `TOKEN` and a `REQUEST` rate limit on the same model slug, but you can't set two rate limits with the same `type`. ```json theme={"system"} { "metadata": { "external_entity_id": "cust_42" }, "models": [ { "slug": "your-org/your-model", "rate_limits": [ { "type": "TOKEN", "unit": "MINUTE", "threshold": 1000000 }, { "type": "REQUEST", "unit": "MINUTE", "threshold": 100 } ] } ], "hierarchy": { "limit_enforcement": "INDEPENDENT", "parent_group_id": null } } ``` In this example, the group can spend up to one million prompt-plus-completion tokens per minute on `your-org/your-model`, and up to 100 requests per minute against the same model. Both ceilings are enforced; whichever the caller hits first triggers a `429 Too Many Requests` response. ## Usage limits A usage limit caps how much a group can spend in a daily window. Usage limits are optional. You can attach a usage limit to any model slug the group is allowed to call. | Field | Values | Description | | ----------- | ------------------ | ------------------------------------------------------------------------ | | `type` | `TOKEN`, `REQUEST` | Whether the limit counts tokens or requests. | | `unit` | `DAY` | The window the threshold applies to. Daily is the only supported window. | | `threshold` | Integer `>= 1` | The maximum count allowed per daily window. | Both `TOKEN` and `REQUEST` are supported as the `type` for a usage limit: ```json theme={"system"} { "models": [ { "slug": "your-org/your-model", "usage_limits": [ { "type": "TOKEN", "unit": "DAY", "threshold": 10000000 }, { "type": "REQUEST", "unit": "DAY", "threshold": 5000 } ] } ] } ``` In this example, the group can spend up to ten million tokens per day and up to 5,000 requests per day on `your-org/your-model`. Whichever ceiling the caller hits first triggers a `429 Too Many Requests` response for the rest of the daily window. ## Per-model scope Limits are scoped per (group, model slug) pair. A group can be authorized for multiple model slugs, and each slug carries its own independent rate-limit and usage-limit buckets. Spending tokens against one model doesn't draw down another model's budget on the same group. ```json theme={"system"} { "models": [ { "slug": "your-org/your-model", "rate_limits": [ { "type": "TOKEN", "unit": "MINUTE", "threshold": 1000000 } ], "usage_limits": [ { "type": "TOKEN", "unit": "DAY", "threshold": 10000000 } ] }, { "slug": "your-org/your-other-model", "rate_limits": [ { "type": "REQUEST", "unit": "SECOND", "threshold": 20 } ] } ] } ``` In this example, `your-org/your-model` carries a per-minute token rate limit and a daily token usage limit, while `your-org/your-other-model` carries only a per-second request rate limit. The two slugs are independent. ## Inheritance modes Every group declares an enforcement mode at creation by setting `hierarchy.limit_enforcement` to one of two values: `INDEPENDENT` or `CASCADING`. The mode controls how a child group's usage interacts with its ancestors. The mode is fixed for the whole hierarchy: children must declare the same mode as their parent, and the field is immutable after creation. Hierarchies are capped at five levels deep. ### Independent mode In an independent hierarchy, a child group inherits any limit its ancestors set when the child omits it, but the child's usage is metered separately from its ancestors. A child can override an inherited threshold upward or downward. A sibling's traffic never draws down another sibling's budget. Think of an independent hierarchy as a template. The parent group establishes default limits, and children opt out of them by declaring their own. Consumption is bucketed per group, with no cross-group accounting. Worked example. A root group `free-tier` has: ```json theme={"system"} { "type": "TOKEN", "unit": "MINUTE", "threshold": 100000000 } ``` A child group `john` under `free-tier` declares no limits. The runtime enforces 100M TPM on `john`, sourced from `free-tier`. If you later raise `free-tier`'s threshold to 150M TPM, `john` automatically gets 150M TPM too. A sibling child group `sally` under `free-tier` declares its own ceiling: ```json theme={"system"} { "type": "TOKEN", "unit": "MINUTE", "threshold": 120000000 } ``` The runtime enforces 120M TPM on `sally`, sourced from `sally`. `john`'s traffic doesn't draw down `sally`'s budget, and `sally`'s traffic doesn't draw down `john`'s. ### Cascading mode In a cascading hierarchy, a child group's usage counts against every ancestor at the same time. A request that fits the child's own limit can still be rejected if an ancestor is exhausted. Think of a cascading hierarchy as a shared pool. An ancestor establishes a hard cap on the subtree's total consumption, and children divide it. Siblings can compete for the same pool: one sibling spending heavily reduces what's available to the others. Children in a cascading hierarchy can't declare a threshold higher than any ancestor's threshold for the same (slug, type, unit) tuple. Frontier Gateway enforces this at write time, on both create and update. Any of the following requests fails with `400 Bad Request: "Child group exceeds parent group limit."`: * Creating a child whose declared threshold exceeds an ancestor's threshold for the same (slug, type, unit). * Raising a descendant's threshold past an ancestor's with `PATCH`. * Lowering an ancestor's threshold with `PATCH` below the highest existing descendant threshold. To raise a subtree's ceiling, raise the ancestor first, then the descendants. To lower an ancestor below a descendant, lower the descendant first. Each direction is rejected if you do it out of order. Worked example. A root group `org` has: ```json theme={"system"} { "type": "TOKEN", "unit": "MINUTE", "threshold": 100000000 } ``` Two children `finance` and `engineering` under `org` each declare: ```json theme={"system"} { "type": "TOKEN", "unit": "MINUTE", "threshold": 70000000 } ``` Each child's `effective_models` shows 70M TPM sourced from itself, but the runtime also enforces the 100M TPM ceiling sourced from `org` against the **combined** traffic of `finance` and `engineering`. If `finance` consumes 70M in a given minute, `engineering` has only 30M of headroom left in that minute, regardless of its own declared 70M ceiling. The 70M + 70M over-provisioning is allowed at create time because each individual child threshold (70M) stays at or below the parent's (100M); only a single child threshold that exceeded the parent's would be rejected. The following chart traces that same minute. `finance` consumes its full 70M ceiling, dropping the `org` pool from 100M to 30M, then `engineering` hits `429` after 30M of accepted traffic with 40M of its own 70M ceiling still untouched. ### Effective limits and inheritance Every group response carries two parallel blocks: * **`models`**: the configuration you wrote on this specific group, as if you were reading the row alone. * **`effective_models`**: the limits the runtime enforces on this group after walking the hierarchy. Each limit carries a `source_group` field pointing to the group (this one or an ancestor) the limit is anchored to. In an independent hierarchy, `effective_models` resolves each (slug, type, unit) tuple by taking the closest ancestor (including self) that declared it. In a cascading hierarchy, `effective_models` lists every distinct ancestor limit the request is subject to. Read it as the full set of ceilings that gate this group's traffic. `effective_models` is read-only. To change what a group enforces, update the `models` block on the group itself (or on an ancestor) with `PATCH /v1/gateway/groups/{group_id}`. ## Enforcement and reset When a request from one of a group's keys exceeds any limit on the request's `effective_models` for the requested model slug, the platform rejects the request with `429 Too Many Requests`. The 429 fires for the first limit hit: if a group has a `TOKEN/MINUTE` rate limit and a `REQUEST/DAY` usage limit, either can trigger rejection. In a cascading hierarchy, the limit hit can be one anchored on an ancestor rather than the calling group's own configuration. Daily usage windows reset at midnight UTC. After reset, a group's consumption for each `DAY` limit returns to zero and the group can spend up to the threshold again over the next 24 hours. Rate-limit windows (per second, per minute) are short rolling windows enforced inline on every request and don't have a reset timestamp you need to track. ## Current consumption To inspect a group's usage against its configured `usage_limits` without waiting for a 429, call `GET /v1/gateway/groups/{group_id}/usage`. The response returns one entry per `(model slug, type, unit)` tuple the group has a usage limit on, with the configured `threshold`, the `current_usage` in the active daily window, and the `reset_at` timestamp for that window. ```bash Request theme={"system"} curl --request GET \ --url https://api.baseten.co/v1/gateway/groups/abc123hash/usage \ --header "Authorization: Api-Key $BASETEN_API_KEY" ``` ```json Output theme={"system"} { "customer_id": "cust_42", "usage": { "your-org/your-model": [ { "type": "TOKEN", "unit": "DAY", "threshold": 10000000, "current_usage": 4231899, "reset_at": "2026-05-21T00:00:00Z" } ] } } ``` Only models that have `usage_limits` configured on the group's effective configuration appear in the response. Rate-limit consumption isn't surfaced through this endpoint; rate limits are short rolling windows and don't carry a stored counter. For the full response shape, see [Get group usage](/reference/gateway/groups/get-group-usage). ## Frontier Gateway versus Model APIs limits Frontier Gateway and the shared Model APIs product use different limit models: * **Frontier Gateway** limits are **per group, per model slug**, with an inheritance mode picked at the root. You configure `TOKEN`/`REQUEST` rate limits (`SECOND` or `MINUTE`) and optional `TOKEN`/`REQUEST` usage limits (`DAY`) on the group, and every key minted under the group inherits the group's effective config. * **Model APIs** limits are **account-tier RPM/TPM** ceilings that apply to your workspace API key as a whole, regardless of which Model APIs model you're calling. For more information on Model APIs limits, see [Rate limits and budgets](/inference/model-apis/rate-limits-and-budgets). ## Next steps * **[Manage groups and API keys](/frontier-gateway/api-keys)**: Configure limits when you create or update a group, and rotate keys without changing them.