diff --git a/packages/core/lib/v3/agent/tools/index.ts b/packages/core/lib/v3/agent/tools/index.ts index ecaba3fad..2c644e06c 100644 --- a/packages/core/lib/v3/agent/tools/index.ts +++ b/packages/core/lib/v3/agent/tools/index.ts @@ -168,7 +168,7 @@ export function createAgentTools(v3: V3, options?: V3AgentToolOptions) { fillForm: fillFormTool(v3, executionModel, variables, toolTimeout), fillFormVision: fillFormVisionTool(v3, provider, variables), goto: gotoTool(v3), - keys: keysTool(v3), + keys: keysTool(v3, variables), navback: navBackTool(v3), screenshot: screenshotTool(v3), scroll: mode === "hybrid" ? scrollVisionTool(v3, provider) : scrollTool(v3), diff --git a/packages/core/lib/v3/agent/tools/keys.ts b/packages/core/lib/v3/agent/tools/keys.ts index b8e581472..dfe7078b0 100644 --- a/packages/core/lib/v3/agent/tools/keys.ts +++ b/packages/core/lib/v3/agent/tools/keys.ts @@ -1,9 +1,16 @@ import { tool } from "ai"; import { z } from "zod"; +import type { Variables } from "../../types/public/agent.js"; import type { V3 } from "../../v3.js"; +import { substituteVariables } from "../utils/variables.js"; -export const keysTool = (v3: V3) => - tool({ +export const keysTool = (v3: V3, variables?: Variables) => { + const hasVariables = variables && Object.keys(variables).length > 0; + const valueDescription = hasVariables + ? `The text to type, or the key/combo to press (Enter, Tab, Cmd+A). Use %variableName% to substitute a variable value when method="type". Available: ${Object.keys(variables).join(", ")}` + : "The text to type, or the key/combo to press (Enter, Tab, Cmd+A)"; + + return tool({ description: `Send keyboard input to the page without targeting a specific element. Unlike the type tool which clicks then types into coordinates, this sends keystrokes directly to wherever focus currently is. Use method="type" to enter text into the currently focused element. Preferred when: input is already focused, text needs to flow across multiple fields (e.g., verification codes) @@ -11,11 +18,7 @@ Use method="type" to enter text into the currently focused element. Preferred wh Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) and keyboard shortcuts (Cmd+A, Ctrl+C, Shift+Tab).`, inputSchema: z.object({ method: z.enum(["press", "type"]), - value: z - .string() - .describe( - "The text to type, or the key/combo to press (Enter, Tab, Cmd+A)", - ), + value: z.string().describe(valueDescription), repeat: z.number().optional(), }), execute: async ({ method, value, repeat }) => { @@ -36,8 +39,9 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a const times = Math.max(1, repeat ?? 1); if (method === "type") { + const actualValue = substituteVariables(value, variables); for (let i = 0; i < times; i++) { - await page.type(value, { delay: 100 }); + await page.type(actualValue, { delay: 100 }); } v3.recordAgentReplayStep({ type: "keys", @@ -65,3 +69,4 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a } }, }); +}; diff --git a/packages/core/tests/unit/agent-execution-model.test.ts b/packages/core/tests/unit/agent-execution-model.test.ts index b1e4dfd65..5852dc723 100644 --- a/packages/core/tests/unit/agent-execution-model.test.ts +++ b/packages/core/tests/unit/agent-execution-model.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from "vitest"; import { actTool } from "../../lib/v3/agent/tools/act.js"; import { extractTool } from "../../lib/v3/agent/tools/extract.js"; import { fillFormTool } from "../../lib/v3/agent/tools/fillform.js"; +import { keysTool } from "../../lib/v3/agent/tools/keys.js"; import type { V3 } from "../../lib/v3/v3.js"; /** @@ -10,10 +11,17 @@ import type { V3 } from "../../lib/v3/v3.js"; */ function createMockV3() { const calls: { method: string; model: unknown; variables?: unknown }[] = []; + const mockPage = { + type: vi.fn(async () => undefined), + keyPress: vi.fn(async () => undefined), + }; const mock = { logger: vi.fn(), recordAgentReplayStep: vi.fn(), + context: { + awaitActivePage: vi.fn(async () => mockPage), + }, act: vi.fn(async (_instruction: unknown, options?: { model?: unknown }) => { calls.push({ method: "act", model: options?.model }); return { @@ -47,9 +55,13 @@ function createMockV3() { }, ), calls, + mockPage, }; - return mock as unknown as V3 & { calls: typeof calls }; + return mock as unknown as V3 & { + calls: typeof calls; + mockPage: typeof mockPage; + }; } describe("agent tools pass full executionModel config to v3 methods", () => { @@ -133,6 +145,29 @@ describe("agent tools pass full executionModel config to v3 methods", () => { expect(v3.calls[0].variables).toBe(variables); }); + it("keysTool substitutes variables before typing", async () => { + const v3 = createMockV3(); + const variables = { + token: { + value: "my-secret-value", + description: "The token to type", + }, + }; + const tool = keysTool(v3, variables); + await tool.execute!( + { method: "type", value: "%token%" }, + { + toolCallId: "t3-keys-variables", + messages: [], + abortSignal: new AbortController().signal, + }, + ); + + expect(v3.mockPage.type).toHaveBeenCalledWith("my-secret-value", { + delay: 100, + }); + }); + it("actTool passes undefined when no executionModel is set", async () => { const v3 = createMockV3(); const tool = actTool(v3, undefined);