Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/lib/v3/agent/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ export function createAgentTools(v3: V3, options?: V3AgentToolOptions) {
fillForm: fillFormTool(v3, executionModel, variables, toolTimeout),
fillFormVision: fillFormVisionTool(v3, provider, variables),
goto: gotoTool(v3),
keys: keysTool(v3),
keys: keysTool(v3, variables),
navback: navBackTool(v3),
screenshot: screenshotTool(v3),
scroll: mode === "hybrid" ? scrollVisionTool(v3, provider) : scrollTool(v3),
Expand Down
21 changes: 13 additions & 8 deletions packages/core/lib/v3/agent/tools/keys.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
import { tool } from "ai";
import { z } from "zod";
import type { Variables } from "../../types/public/agent.js";
import type { V3 } from "../../v3.js";
import { substituteVariables } from "../utils/variables.js";

export const keysTool = (v3: V3) =>
tool({
export const keysTool = (v3: V3, variables?: Variables) => {
const hasVariables = variables && Object.keys(variables).length > 0;
const valueDescription = hasVariables
? `The text to type, or the key/combo to press (Enter, Tab, Cmd+A). Use %variableName% to substitute a variable value when method="type". Available: ${Object.keys(variables).join(", ")}`
: "The text to type, or the key/combo to press (Enter, Tab, Cmd+A)";

return tool({
description: `Send keyboard input to the page without targeting a specific element. Unlike the type tool which clicks then types into coordinates, this sends keystrokes directly to wherever focus currently is.

Use method="type" to enter text into the currently focused element. Preferred when: input is already focused, text needs to flow across multiple fields (e.g., verification codes)

Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) and keyboard shortcuts (Cmd+A, Ctrl+C, Shift+Tab).`,
inputSchema: z.object({
method: z.enum(["press", "type"]),
value: z
.string()
.describe(
"The text to type, or the key/combo to press (Enter, Tab, Cmd+A)",
),
value: z.string().describe(valueDescription),
repeat: z.number().optional(),
}),
execute: async ({ method, value, repeat }) => {
Expand All @@ -36,8 +39,9 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
const times = Math.max(1, repeat ?? 1);

if (method === "type") {
const actualValue = substituteVariables(value, variables);
for (let i = 0; i < times; i++) {
await page.type(value, { delay: 100 });
await page.type(actualValue, { delay: 100 });
}
v3.recordAgentReplayStep({
type: "keys",
Expand Down Expand Up @@ -65,3 +69,4 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
}
},
});
};
37 changes: 36 additions & 1 deletion packages/core/tests/unit/agent-execution-model.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from "vitest";
import { actTool } from "../../lib/v3/agent/tools/act.js";
import { extractTool } from "../../lib/v3/agent/tools/extract.js";
import { fillFormTool } from "../../lib/v3/agent/tools/fillform.js";
import { keysTool } from "../../lib/v3/agent/tools/keys.js";
import type { V3 } from "../../lib/v3/v3.js";

/**
Expand All @@ -10,10 +11,17 @@ import type { V3 } from "../../lib/v3/v3.js";
*/
function createMockV3() {
const calls: { method: string; model: unknown; variables?: unknown }[] = [];
const mockPage = {
type: vi.fn(async () => undefined),
keyPress: vi.fn(async () => undefined),
};

const mock = {
logger: vi.fn(),
recordAgentReplayStep: vi.fn(),
context: {
awaitActivePage: vi.fn(async () => mockPage),
},
act: vi.fn(async (_instruction: unknown, options?: { model?: unknown }) => {
calls.push({ method: "act", model: options?.model });
return {
Expand Down Expand Up @@ -47,9 +55,13 @@ function createMockV3() {
},
),
calls,
mockPage,
};

return mock as unknown as V3 & { calls: typeof calls };
return mock as unknown as V3 & {
calls: typeof calls;
mockPage: typeof mockPage;
};
}

describe("agent tools pass full executionModel config to v3 methods", () => {
Expand Down Expand Up @@ -133,6 +145,29 @@ describe("agent tools pass full executionModel config to v3 methods", () => {
expect(v3.calls[0].variables).toBe(variables);
});

it("keysTool substitutes variables before typing", async () => {
const v3 = createMockV3();
const variables = {
token: {
value: "my-secret-value",
description: "The token to type",
},
};
const tool = keysTool(v3, variables);
await tool.execute!(
{ method: "type", value: "%token%" },
{
toolCallId: "t3-keys-variables",
messages: [],
abortSignal: new AbortController().signal,
},
);

expect(v3.mockPage.type).toHaveBeenCalledWith("my-secret-value", {
delay: 100,
});
});

it("actTool passes undefined when no executionModel is set", async () => {
const v3 = createMockV3();
const tool = actTool(v3, undefined);
Expand Down
Loading