-
Notifications
You must be signed in to change notification settings - Fork 152
41 lines (34 loc) · 1.32 KB
/
_evaluations.yaml
File metadata and controls
41 lines (34 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Reusable workflow for MCP tool calling evaluations.
# Evaluates AI models' ability to correctly identify and call MCP tools.
on:
workflow_call:
inputs:
pr_number:
description: Pull request number
required: true
type: string
jobs:
evaluations:
name: MCP tool calling evaluations
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Use Node.js
uses: actions/setup-node@v6
with:
node-version-file: '.nvmrc'
cache: 'npm'
cache-dependency-path: 'package-lock.json'
- name: Install Node dependencies
run: npm ci --force --include=dev
- name: Build project
run: npm run build
- name: Run evaluations
run: npm run evals:run
env:
GITHUB_PR_NUMBER: ${{ inputs.pr_number }}
PHOENIX_API_KEY: ${{ secrets.PHOENIX_API_KEY }}
PHOENIX_BASE_URL: ${{ secrets.PHOENIX_BASE_URL }}
OPENROUTER_BASE_URL: ${{ secrets.OPENROUTER_BASE_URL }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}