Skip to content

Commit eafbf3d

Browse files
committed
feat: add cache input token tracking in AIGateway
1 parent 8816c8e commit eafbf3d

File tree

10 files changed

+66
-6
lines changed

10 files changed

+66
-6
lines changed

src/client/components/aiGateway/AIGatewayAnalytics.tsx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ export const AIGatewayAnalytics: React.FC<AIGatewayAnalyticsProps> = React.memo(
304304
<CardTitle>{t('Token Usage by User')}</CardTitle>
305305
<p className="text-muted-foreground text-sm">
306306
{t(
307-
'Total token consumption (input + output) distribution across different users'
307+
'Total token consumption (input + output + cache input) distribution across different users'
308308
)}
309309
</p>
310310
</CardHeader>
@@ -317,6 +317,11 @@ export const AIGatewayAnalytics: React.FC<AIGatewayAnalyticsProps> = React.memo(
317317
metrics={[
318318
{ name: 'inputToken', math: 'events', alias: 'inputToken' },
319319
{ name: 'outputToken', math: 'events', alias: 'outputToken' },
320+
{
321+
name: 'cacheInputToken',
322+
math: 'events',
323+
alias: 'cacheInputToken',
324+
},
320325
]}
321326
filters={[]}
322327
groups={[{ value: 'userId', type: 'string' }]}

src/client/components/aiGateway/AIGatewayLogTable.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ export const AIGatewayLogTable: React.FC<AIGatewayLogTableProps> = React.memo(
148148

149149
<SheetDataSection label="Tokens">
150150
{selectedItem.inputToken}↑ | {selectedItem.outputToken}
151+
{selectedItem.cacheInputToken > 0 && (
152+
<> | {selectedItem.cacheInputToken} cached</>
153+
)}
151154
</SheetDataSection>
152155

153156
<SheetDataSection label={t('Request Payload')}>

src/client/components/aiGateway/AIGatewayOverview.tsx

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ export const AIGatewayOverview: React.FC<AIGatewayOverviewProps> = React.memo(
3434

3535
const { startDate, endDate, unit, refresh } = useGlobalRangeDate();
3636
const [type, setType] = useState<
37-
'$all_event' | 'inputToken' | 'outputToken' | 'price'
37+
| '$all_event'
38+
| 'inputToken'
39+
| 'outputToken'
40+
| 'cacheInputToken'
41+
| 'price'
3842
>('price');
3943
const [isQuotaModalOpen, setIsQuotaModalOpen] = useState(false);
4044

@@ -88,6 +92,13 @@ export const AIGatewayOverview: React.FC<AIGatewayOverviewProps> = React.memo(
8892
color: colors.blue[500],
8993
},
9094
};
95+
} else if (type === 'cacheInputToken') {
96+
return {
97+
value: {
98+
label: t('AIGateway Cache Input Token'),
99+
color: colors.blue[500],
100+
},
101+
};
91102
} else if (type === 'price') {
92103
return {
93104
value: {
@@ -163,6 +174,9 @@ export const AIGatewayOverview: React.FC<AIGatewayOverviewProps> = React.memo(
163174
<SelectItem value="outputToken">
164175
{t('Output Token')}
165176
</SelectItem>
177+
<SelectItem value="cacheInputToken">
178+
{t('Cache Input Token')}
179+
</SelectItem>
166180
<SelectItem value="price">{t('Price')}</SelectItem>
167181
</SelectContent>
168182
</Select>

src/client/components/aiGateway/AIGatewaySummaryStats.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export const AIGatewaySummaryStats: React.FC<AIGatewaySummaryStatsProps> =
2626
{ name: '$all_event', math: 'events' },
2727
{ name: 'inputToken', math: 'events' },
2828
{ name: 'outputToken', math: 'events' },
29+
{ name: 'cacheInputToken', math: 'events' },
2930
{ name: 'price', math: 'events' },
3031
],
3132
filters: [],
@@ -48,6 +49,7 @@ export const AIGatewaySummaryStats: React.FC<AIGatewaySummaryStatsProps> =
4849
'$all_event',
4950
'inputToken',
5051
'outputToken',
52+
'cacheInputToken',
5153
'price',
5254
];
5355
const counts = metric?.data || [];
@@ -65,7 +67,7 @@ export const AIGatewaySummaryStats: React.FC<AIGatewaySummaryStatsProps> =
6567
return (
6668
<LoadingView isLoading={isLoading}>
6769
<div className="mb-6">
68-
<div className="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-4">
70+
<div className="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-5">
6971
{summaryData.map((metric) => {
7072
let label = '';
7173
let formattedValue = '';
@@ -83,6 +85,10 @@ export const AIGatewaySummaryStats: React.FC<AIGatewaySummaryStatsProps> =
8385
label = t('Total Output Tokens');
8486
formattedValue = metric.total.toLocaleString();
8587
break;
88+
case 'cacheInputToken':
89+
label = t('Total Cache Input Tokens');
90+
formattedValue = metric.total.toLocaleString();
91+
break;
8692
case 'price':
8793
label = t('Total Price');
8894
formattedValue = `$${metric.total.toFixed(4)}`;

src/client/components/aiGateway/useAIGatewayLogColumns.tsx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ export function useAIGatewayLogColumns(onRowSelect?: (index: number) => void) {
7575
size: 120,
7676
cell: (props) => renderNullableValue(props.getValue()),
7777
}),
78+
columnHelper.accessor('cacheInputToken', {
79+
header: t('Cache Input Tokens'),
80+
size: 140,
81+
cell: (props) => renderNullableValue(props.getValue()),
82+
}),
7883
columnHelper.accessor('price', {
7984
header: t('Price ($)'),
8085
size: 120,

src/server/model/aiGateway.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ export function buildOpenAIHandler(
9595
stream,
9696
inputToken: 0,
9797
outputToken: 0,
98+
cacheInputToken: 0,
9899
duration: 0,
99100
ttft: 0,
100101
requestPayload: payload,
@@ -180,6 +181,9 @@ export function buildOpenAIHandler(
180181
calcOpenAIToken(outputContent, modelName),
181182
]);
182183

184+
const cacheInputToken =
185+
get(usage, ['prompt_tokens_details', 'cached_tokens']) ?? 0;
186+
183187
const customInputPrice = gatewayInfo?.customModelInputPrice;
184188
const customOutputPrice = gatewayInfo?.customModelOutputPrice;
185189

@@ -219,6 +223,7 @@ export function buildOpenAIHandler(
219223
modelName: responseModelName,
220224
inputToken,
221225
outputToken,
226+
cacheInputToken,
222227
duration,
223228
ttft,
224229
price,
@@ -262,6 +267,9 @@ export function buildOpenAIHandler(
262267
: Promise.resolve(0)),
263268
]);
264269

270+
const cacheInputToken =
271+
response.usage?.prompt_tokens_details?.cached_tokens ?? 0;
272+
265273
const customInputPrice = gatewayInfo?.customModelInputPrice;
266274
const customOutputPrice = gatewayInfo?.customModelOutputPrice;
267275

@@ -300,6 +308,7 @@ export function buildOpenAIHandler(
300308
status: AIGatewayLogsStatus.Success,
301309
inputToken,
302310
outputToken,
311+
cacheInputToken,
303312
duration,
304313
modelName: responseModelName,
305314
price,
@@ -409,6 +418,7 @@ export function buildAnthropicHandler(
409418
stream,
410419
inputToken: 0,
411420
outputToken: 0,
421+
cacheInputToken: 0,
412422
duration: 0,
413423
ttft: 0,
414424
requestPayload: payload,
@@ -482,6 +492,7 @@ export function buildAnthropicHandler(
482492

483493
let inputTokens = 0;
484494
let outputTokens = 0;
495+
let cacheInputTokens = 0;
485496
let outputContent = '';
486497
let ttft = -1;
487498
let responseModelName = modelName;
@@ -523,6 +534,9 @@ export function buildAnthropicHandler(
523534
const data = JSON.parse(line.slice(6));
524535
if (currentEventType === 'message_start' && data.message) {
525536
responseModelName = data.message.model || responseModelName;
537+
cacheInputTokens =
538+
data.message.usage?.cache_read_input_tokens ||
539+
cacheInputTokens;
526540
} else if (currentEventType === 'content_block_delta') {
527541
if (ttft === -1) {
528542
ttft = Date.now() - start;
@@ -578,6 +592,7 @@ export function buildAnthropicHandler(
578592
modelName: responseModelName,
579593
inputToken: inputTokens,
580594
outputToken: outputTokens,
595+
cacheInputToken: cacheInputTokens,
581596
duration,
582597
ttft,
583598
price,
@@ -607,6 +622,7 @@ export function buildAnthropicHandler(
607622
const usage = responseBody.usage;
608623
const inputTokens = usage?.input_tokens || 0;
609624
const outputTokens = usage?.output_tokens || 0;
625+
const cacheInputTokens = usage?.cache_read_input_tokens || 0;
610626
const responseCost = usage?.cost;
611627

612628
const contentBlocks = responseBody.content || [];
@@ -642,6 +658,7 @@ export function buildAnthropicHandler(
642658
modelName: responseModelName,
643659
inputToken: inputTokens,
644660
outputToken: outputTokens,
661+
cacheInputToken: cacheInputTokens,
645662
duration,
646663
price,
647664
responsePayload: {

src/server/model/insights/aiGateway.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,14 @@ export class AIGatewayInsightsSqlBuilder extends InsightsSqlBuilder {
2424

2525
// For standard fields, directly count
2626
if (
27-
['inputToken', 'outputToken', 'price', 'duration', 'ttft'].includes(
28-
item.name
29-
)
27+
[
28+
'inputToken',
29+
'outputToken',
30+
'cacheInputToken',
31+
'price',
32+
'duration',
33+
'ttft',
34+
].includes(item.name)
3035
) {
3136
return sql`sum("AIGatewayLogs"."${raw(item.name)}") as ${raw(`"${alias}"`)}`;
3237
}
@@ -119,6 +124,7 @@ export class AIGatewayInsightsSqlBuilder extends InsightsSqlBuilder {
119124
'stream',
120125
'inputToken',
121126
'outputToken',
127+
'cacheInputToken',
122128
'duration',
123129
'ttft',
124130
'price',
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- AlterTable
2+
ALTER TABLE "AIGatewayLogs" ADD COLUMN "cacheInputToken" INTEGER NOT NULL DEFAULT 0;

src/server/prisma/schema.prisma

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,7 @@ model AIGatewayLogs {
977977
gatewayId String @db.VarChar(30)
978978
inputToken Int @default(0) @db.Integer
979979
outputToken Int @default(0) @db.Integer
980+
cacheInputToken Int @default(0) @db.Integer
980981
stream Boolean @default(false) @db.Boolean // stream response or not
981982
modelName String
982983
status AIGatewayLogsStatus

src/server/prisma/zod/aigatewaylogs.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ export const AIGatewayLogsModelSchema = z.object({
3030
gatewayId: z.string(),
3131
inputToken: z.number().int(),
3232
outputToken: z.number().int(),
33+
cacheInputToken: z.number().int().default(0),
3334
stream: z.boolean(),
3435
modelName: z.string(),
3536
status: z.nativeEnum(AIGatewayLogsStatus),

0 commit comments

Comments
 (0)