refactor(billing): derive agent credit schema from tokenlens (DAT-100)

izadoesdev · izadoesdev · commit c6f1e43d0867 · 2026-04-08T22:17:28.000+03:00
The credit rates in autumn.config.ts and the agent-cost-probe script
were hand-picked magic numbers duplicated across two files, with only
a cross-reference comment to keep them in sync. This factored the
pricing into a single shared module that derives credits from real
provider rates, plus adds the missing web_search credit to the probe.

Formula: credit_per_token = usd_per_token × MARKUP × CREDITS_PER_USD
  - usd_per_token comes from tokenlens' Vercel AI Gateway catalog
    (anthropic/claude-4-sonnet)
  - MARKUP = 1.20 (20% margin)
  - CREDITS_PER_USD = 200 (tuning knob for plan-tier runway)

Derived values exactly match the prior hand-picked schema:
  input       0.000_72   output      0.0036
  cacheRead   0.000_072  cacheWrite  0.001_44
  webSearch   5 (flat)

cacheWrite override: tokenlens exposes Anthropic's 5-minute cache rate
($3.75/M), but the agent uses ttl: "1h" in prompt-cache.ts, which bills
at the 1-hour rate ($6/M). The override lives alongside a comment
explaining why — if the TTL ever changes, delete the constant and let
tokenlens drive the rate directly.

Changes:
- packages/shared/src/billing/credit-schema.ts: new module exporting
  AGENT_CREDIT_SCHEMA, CREDITS_PER_USD, MARKUP, BASELINE_MODEL_ID,
  WEB_SEARCH_CREDIT_COST
- packages/shared/package.json: add tokenlens dep + export path
- apps/dashboard/autumn.config.ts: import AGENT_CREDIT_SCHEMA and
  feed its fields into the Autumn creditSchema array
- apps/api/scripts/agent-cost-probe.ts: drop CURRENT_SCHEMA, import
  AGENT_CREDIT_SCHEMA, count web_search tool calls per turn and
  include them in the credit total (was being undercounted before)
diff --git a/apps/api/scripts/agent-cost-probe.ts b/apps/api/scripts/agent-cost-probe.ts
@@ -18,6 +18,7 @@
  * cache hits/misses are realistic.
  */
 
+import { AGENT_CREDIT_SCHEMA } from "@databuddy/shared/billing/credit-schema";
 import { randomUUIDv7 } from "bun";
 import { convertToModelMessages, ToolLoopAgent, type UIMessage } from "ai";
 import { createAgentConfig } from "../src/ai/agents";
@@ -58,24 +59,16 @@ if (!(websiteId && userId)) {
 	process.exit(1);
 }
 
-// Matches creditSchema in apps/dashboard/autumn.config.ts.
-// Keep in sync — if you change rates in one place, change them in the other.
-const CURRENT_SCHEMA = {
-	input: 0.000_72,
-	output: 0.0036,
-	cacheRead: 0.000_072,
-	cacheWrite: 0.001_44,
-};
-
 function computeCredits(
-	schema: typeof CURRENT_SCHEMA,
-	s: ReturnType<typeof summarizeAgentUsage>
+	s: ReturnType<typeof summarizeAgentUsage>,
+	webSearchCalls: number
 ): number {
 	return (
-		s.fresh_input_tokens * schema.input +
-		s.output_tokens * schema.output +
-		s.cache_read_tokens * schema.cacheRead +
-		s.cache_write_tokens * schema.cacheWrite
+		s.fresh_input_tokens * AGENT_CREDIT_SCHEMA.input +
+		s.output_tokens * AGENT_CREDIT_SCHEMA.output +
+		s.cache_read_tokens * AGENT_CREDIT_SCHEMA.cacheRead +
+		s.cache_write_tokens * AGENT_CREDIT_SCHEMA.cacheWrite +
+		webSearchCalls * AGENT_CREDIT_SCHEMA.webSearch
 	);
 }
 
@@ -117,6 +110,7 @@ async function main() {
 		read: 0,
 		write: 0,
 		output: 0,
+		webSearch: 0,
 	};
 
 	for (const [idx, message] of messages.entries()) {
@@ -143,11 +137,15 @@ async function main() {
 
 		let assistantText = "";
 		let toolCalls = 0;
+		let webSearchCalls = 0;
 		for await (const part of result.fullStream) {
 			if (part.type === "text-delta") {
 				assistantText += part.text ?? "";
 			} else if (part.type === "tool-call") {
 				toolCalls++;
+				if (part.toolName === "web_search") {
+					webSearchCalls++;
+				}
 				console.log(`  → tool call: ${part.toolName}`);
 			}
 		}
@@ -162,16 +160,18 @@ async function main() {
 		const steps = (await result.steps).length;
 		const elapsed = ((Date.now() - t0) / 1000).toFixed(2);
 		const summary = summarizeAgentUsage(modelNames.analytics, usage);
-		const credits = computeCredits(CURRENT_SCHEMA, summary);
+		const credits = computeCredits(summary, webSearchCalls);
 
 		totals.credits += credits;
 		totals.fresh += summary.fresh_input_tokens;
 		totals.read += summary.cache_read_tokens;
 		totals.write += summary.cache_write_tokens;
 		totals.output += summary.output_tokens;
+		totals.webSearch += webSearchCalls;
 
+		const webSearchNote = webSearchCalls > 0 ? ` · web ${webSearchCalls}` : "";
 		console.log(
-			`  ${elapsed}s · ${steps} steps · ${toolCalls} tools · fresh ${summary.fresh_input_tokens} · read ${summary.cache_read_tokens} · write ${summary.cache_write_tokens} · out ${summary.output_tokens}${
+			`  ${elapsed}s · ${steps} steps · ${toolCalls} tools${webSearchNote} · fresh ${summary.fresh_input_tokens} · read ${summary.cache_read_tokens} · write ${summary.cache_write_tokens} · out ${summary.output_tokens}${
 				summary.reasoning_tokens > 0
 					? ` (${summary.reasoning_tokens} reasoning)`
 					: ""
@@ -189,6 +189,9 @@ async function main() {
 	console.log(`  cache read   ${totals.read.toLocaleString().padStart(10)}`);
 	console.log(`  cache write  ${totals.write.toLocaleString().padStart(10)}`);
 	console.log(`  output       ${totals.output.toLocaleString().padStart(10)}`);
+	console.log(
+		`  web search   ${totals.webSearch.toLocaleString().padStart(10)}`
+	);
 	console.log(`  credits      ${totals.credits.toFixed(2).padStart(10)}`);
 	console.log();
 	console.log(
diff --git a/apps/dashboard/autumn.config.ts b/apps/dashboard/autumn.config.ts
@@ -1,3 +1,4 @@
+import { AGENT_CREDIT_SCHEMA } from "@databuddy/shared/billing/credit-schema";
 import { feature, item, plan } from "atmn";
 
 /*
@@ -104,11 +105,26 @@ export const agent_credits = feature({
 	name: "Agent Credits",
 	type: "credit_system",
 	creditSchema: [
-		{ meteredFeatureId: "agent_input_tokens", creditCost: 0.000_72 },
-		{ meteredFeatureId: "agent_output_tokens", creditCost: 0.0036 },
-		{ meteredFeatureId: "agent_cache_read_tokens", creditCost: 0.000_072 },
-		{ meteredFeatureId: "agent_cache_write_tokens", creditCost: 0.001_44 },
-		{ meteredFeatureId: "agent_web_search_calls", creditCost: 5 },
+		{
+			meteredFeatureId: "agent_input_tokens",
+			creditCost: AGENT_CREDIT_SCHEMA.input,
+		},
+		{
+			meteredFeatureId: "agent_output_tokens",
+			creditCost: AGENT_CREDIT_SCHEMA.output,
+		},
+		{
+			meteredFeatureId: "agent_cache_read_tokens",
+			creditCost: AGENT_CREDIT_SCHEMA.cacheRead,
+		},
+		{
+			meteredFeatureId: "agent_cache_write_tokens",
+			creditCost: AGENT_CREDIT_SCHEMA.cacheWrite,
+		},
+		{
+			meteredFeatureId: "agent_web_search_calls",
+			creditCost: AGENT_CREDIT_SCHEMA.webSearch,
+		},
 	],
 });
 
diff --git a/bun.lock b/bun.lock
diff --git a/packages/shared/package.json b/packages/shared/package.json
@@ -9,6 +9,7 @@
     "./country-codes": "./src/country-codes.ts",
     "./crypto-utils": "./src/crypto-utils.ts",
 
+    "./billing/credit-schema": "./src/billing/credit-schema.ts",
     "./lists/filters": "./src/lists/filters.ts",
     "./lists/referrers": "./src/lists/referrers.ts",
     "./lists/timezones": "./src/lists/timezones.ts",
@@ -47,6 +48,7 @@
     "drizzle-orm": "catalog:",
     "evlog": "^2.8.0",
     "nanoid": "catalog:",
+    "tokenlens": "^1.3.1",
     "ua-parser-js": "catalog:",
     "zod": "catalog:"
   }
diff --git a/packages/shared/src/billing/credit-schema.ts b/packages/shared/src/billing/credit-schema.ts
@@ -0,0 +1,112 @@
+/**
+ * Single source of truth for agent credit rates.
+ *
+ * Derives per-token credit costs from tokenlens' Vercel AI Gateway catalog
+ * plus a business markup, so the Autumn creditSchema in autumn.config.ts
+ * and the agent-cost-probe script stay in sync with provider prices
+ * automatically. The raw USD rates come from tokenlens; the markup and
+ * credit-to-USD ratio are the pricing knobs we tune per plan tier.
+ *
+ * Credit formula: credits_per_token = usd_per_token × MARKUP × CREDITS_PER_USD
+ */
+
+import { vercelModels } from "tokenlens/providers/vercel";
+
+/**
+ * How many credits the user spends per USD of underlying provider cost.
+ * Tunes plan-tier runway (free 500 / hobby 2500 / pro 25000). Raising
+ * this makes the same dollar of provider usage burn more credits.
+ */
+export const CREDITS_PER_USD = 200;
+
+/** Business markup on top of provider cost. 1.20 = 20% margin. */
+export const MARKUP = 1.2;
+
+/**
+ * Model whose provider rates back the credit schema. If the agent uses
+ * multiple models with materially different prices, pick the most
+ * expensive as the ceiling — we'd rather slightly over-charge than
+ * lose margin on the pricier model.
+ */
+export const BASELINE_MODEL_ID = "anthropic/claude-4-sonnet" as const;
+
+/**
+ * Anthropic's 1-hour prompt cache write rate (USD per 1M tokens).
+ *
+ * The Vercel AI Gateway catalog in tokenlens exposes Anthropic's
+ * 5-minute cache rate ($3.75/M), but our agent is configured with
+ * `ttl: "1h"` in apps/api/src/ai/config/prompt-cache.ts so Anthropic
+ * bills the 1-hour rate ($6/M). We override cacheWrite here to match
+ * production billing. If the agent switches back to 5-minute TTL,
+ * delete this constant and let tokenlens drive the rate directly.
+ */
+const CACHE_WRITE_1H_USD_PER_M_TOKENS = 6;
+
+/**
+ * Flat credit cost per agent_web_search_calls. 5 credits ≈ $0.025 at
+ * CREDITS_PER_USD=200 — priced separately from token burn because the
+ * Perplexity call is a fixed-cost API hit regardless of tokens returned.
+ */
+export const WEB_SEARCH_CREDIT_COST = 5;
+
+const TOKENS_PER_MILLION = 1_000_000;
+
+interface ModelCostsPerMillion {
+	cache_read: number;
+	cache_write: number;
+	input: number;
+	output: number;
+}
+
+function getBaselineUsdPerMillion(): ModelCostsPerMillion {
+	const model = vercelModels.models[BASELINE_MODEL_ID];
+	if (!model?.cost) {
+		throw new Error(
+			`tokenlens vercelModels is missing cost for ${BASELINE_MODEL_ID}`
+		);
+	}
+	return {
+		input: model.cost.input,
+		output: model.cost.output,
+		cache_read: model.cost.cache_read,
+		// Override with the 1-hour rate — see CACHE_WRITE_1H_USD_PER_M_TOKENS.
+		cache_write: CACHE_WRITE_1H_USD_PER_M_TOKENS,
+	};
+}
+
+/**
+ * Converts a per-million-tokens USD rate into a per-token credit rate.
+ * Rounds to 12 significant digits to avoid floating-point noise like
+ * `0.0007199999999999999` leaking into the Autumn creditSchema payload.
+ */
+function toCredits(usdPerMillion: number): number {
+	const raw = (usdPerMillion / TOKENS_PER_MILLION) * MARKUP * CREDITS_PER_USD;
+	return Number.parseFloat(raw.toPrecision(12));
+}
+
+export interface AgentCreditSchema {
+	/** Credits per cache-read input token. */
+	cacheRead: number;
+	/** Credits per cache-write input token. */
+	cacheWrite: number;
+	/** Credits per fresh (non-cached) input token. */
+	input: number;
+	/** Credits per output token. */
+	output: number;
+	/** Flat credits per web search call. */
+	webSearch: number;
+}
+
+const baselineUsd = getBaselineUsdPerMillion();
+
+/**
+ * Canonical agent credit schema. Import this from autumn.config.ts and
+ * from any cost probe so both stay in lockstep.
+ */
+export const AGENT_CREDIT_SCHEMA: AgentCreditSchema = {
+	input: toCredits(baselineUsd.input),
+	output: toCredits(baselineUsd.output),
+	cacheRead: toCredits(baselineUsd.cache_read),
+	cacheWrite: toCredits(baselineUsd.cache_write),
+	webSearch: WEB_SEARCH_CREDIT_COST,
+};