Skip to content

Commit a92b395

Browse files
committed
Auto-commit
1 parent 82fa652 commit a92b395

File tree

3 files changed

+851
-8
lines changed

3 files changed

+851
-8
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
#!/bin/bash
2+
3+
# debate_tool_triggering_challenge.sh - AI Debate Tool Triggering Challenge
4+
# Tests that the AI Debate system properly collects and executes tool calls
5+
# Ensures tool_calls are not discarded and action indicators are present
6+
7+
set -e
8+
9+
# Colors
10+
RED='\033[0;31m'
11+
GREEN='\033[0;32m'
12+
YELLOW='\033[1;33m'
13+
BLUE='\033[0;34m'
14+
CYAN='\033[0;36m'
15+
NC='\033[0m' # No Color
16+
17+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
18+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
19+
20+
# Default configuration
21+
HOST="${HELIXAGENT_HOST:-localhost}"
22+
PORT="${HELIXAGENT_PORT:-7061}"
23+
BASE_URL="http://${HOST}:${PORT}"
24+
RESULTS_DIR="${PROJECT_ROOT}/challenges/results/debate_tool_triggering/$(date +%Y%m%d_%H%M%S)"
25+
26+
echo ""
27+
echo "======================================================================"
28+
echo " HELIXAGENT AI DEBATE TOOL TRIGGERING CHALLENGE"
29+
echo "======================================================================"
30+
echo ""
31+
echo -e "${CYAN}This challenge verifies that the AI Debate system:${NC}"
32+
echo " 1. Collects tool_calls from debate positions"
33+
echo " 2. Uses collected tool_calls in ACTION PHASE"
34+
echo " 3. Shows action indicators (<---, --->)"
35+
echo " 4. Does NOT discard tool_calls from LLM responses"
36+
echo ""
37+
echo "Host: $HOST"
38+
echo "Port: $PORT"
39+
echo "Results: $RESULTS_DIR"
40+
echo ""
41+
42+
# Create results directory
43+
mkdir -p "$RESULTS_DIR/results"
44+
45+
# Challenge tracking
46+
TOTAL_TESTS=0
47+
PASSED_TESTS=0
48+
FAILED_TESTS=0
49+
50+
record_result() {
51+
local test_name="$1"
52+
local status="$2"
53+
local details="$3"
54+
55+
TOTAL_TESTS=$((TOTAL_TESTS + 1))
56+
57+
if [ "$status" == "pass" ]; then
58+
PASSED_TESTS=$((PASSED_TESTS + 1))
59+
echo -e " ${GREEN}[PASS]${NC} $test_name"
60+
else
61+
FAILED_TESTS=$((FAILED_TESTS + 1))
62+
echo -e " ${RED}[FAIL]${NC} $test_name"
63+
echo "$details" >> "$RESULTS_DIR/results/failures.txt"
64+
fi
65+
}
66+
67+
echo "----------------------------------------------------------------------"
68+
echo "Phase 1: DebatePositionResponse Structure Tests"
69+
echo "----------------------------------------------------------------------"
70+
71+
cd "$PROJECT_ROOT"
72+
73+
# Test DebatePositionResponse struct
74+
echo -e "${BLUE}[RUN]${NC} Testing DebatePositionResponse struct..."
75+
if go test -v -run "TestDebatePositionResponse_Struct" ./internal/handlers/... > "$RESULTS_DIR/results/struct_test.txt" 2>&1; then
76+
record_result "DebatePositionResponse struct holds content and tool_calls" "pass" ""
77+
else
78+
record_result "DebatePositionResponse struct holds content and tool_calls" "fail" "$(cat $RESULTS_DIR/results/struct_test.txt)"
79+
fi
80+
81+
# Test empty tool calls handling
82+
echo -e "${BLUE}[RUN]${NC} Testing empty tool calls handling..."
83+
if go test -v -run "TestDebatePositionResponse_EmptyToolCalls" ./internal/handlers/... > "$RESULTS_DIR/results/empty_test.txt" 2>&1; then
84+
record_result "Empty tool_calls handled correctly" "pass" ""
85+
else
86+
record_result "Empty tool_calls handled correctly" "fail" "$(cat $RESULTS_DIR/results/empty_test.txt)"
87+
fi
88+
89+
# Test multiple tool calls
90+
echo -e "${BLUE}[RUN]${NC} Testing multiple tool calls handling..."
91+
if go test -v -run "TestDebatePositionResponse_MultipleToolCalls" ./internal/handlers/... > "$RESULTS_DIR/results/multiple_test.txt" 2>&1; then
92+
record_result "Multiple tool_calls preserved correctly" "pass" ""
93+
else
94+
record_result "Multiple tool_calls preserved correctly" "fail" "$(cat $RESULTS_DIR/results/multiple_test.txt)"
95+
fi
96+
97+
# Test JSON serialization
98+
echo -e "${BLUE}[RUN]${NC} Testing JSON serialization..."
99+
if go test -v -run "TestDebatePositionResponse_JSONSerialization" ./internal/handlers/... > "$RESULTS_DIR/results/json_test.txt" 2>&1; then
100+
record_result "JSON serialization preserves tool_calls" "pass" ""
101+
else
102+
record_result "JSON serialization preserves tool_calls" "fail" "$(cat $RESULTS_DIR/results/json_test.txt)"
103+
fi
104+
105+
echo ""
106+
echo "----------------------------------------------------------------------"
107+
echo "Phase 2: Tool Calls Collection Tests"
108+
echo "----------------------------------------------------------------------"
109+
110+
# Test tool calls collection from debate
111+
echo -e "${BLUE}[RUN]${NC} Testing tool calls collection from debate positions..."
112+
if go test -v -run "TestToolCallsCollectionFromDebate" ./internal/handlers/... > "$RESULTS_DIR/results/collection_test.txt" 2>&1; then
113+
record_result "Tool calls collected from all debate positions" "pass" ""
114+
else
115+
record_result "Tool calls collected from all debate positions" "fail" "$(cat $RESULTS_DIR/results/collection_test.txt)"
116+
fi
117+
118+
# Test debate tool calls integration
119+
echo -e "${BLUE}[RUN]${NC} Testing full debate tool calls integration..."
120+
if go test -v -run "TestDebateToolCallsIntegration" ./internal/handlers/... > "$RESULTS_DIR/results/integration_test.txt" 2>&1; then
121+
record_result "Full debate-to-action tool calls flow" "pass" ""
122+
else
123+
record_result "Full debate-to-action tool calls flow" "fail" "$(cat $RESULTS_DIR/results/integration_test.txt)"
124+
fi
125+
126+
# Test tool calls not discarded
127+
echo -e "${BLUE}[RUN]${NC} Testing tool calls preservation..."
128+
if go test -v -run "TestToolCallsNotDiscarded" ./internal/handlers/... > "$RESULTS_DIR/results/preservation_test.txt" 2>&1; then
129+
record_result "Tool calls are NOT discarded" "pass" ""
130+
else
131+
record_result "Tool calls are NOT discarded" "fail" "$(cat $RESULTS_DIR/results/preservation_test.txt)"
132+
fi
133+
134+
echo ""
135+
echo "----------------------------------------------------------------------"
136+
echo "Phase 3: Action Indicator Tests"
137+
echo "----------------------------------------------------------------------"
138+
139+
# Test action indicator generation
140+
echo -e "${BLUE}[RUN]${NC} Testing action indicator generation..."
141+
if go test -v -run "TestActionIndicatorGeneration" ./internal/handlers/... > "$RESULTS_DIR/results/indicator_gen_test.txt" 2>&1; then
142+
record_result "Action indicators (<---, --->) generated correctly" "pass" ""
143+
else
144+
record_result "Action indicators (<---, --->) generated correctly" "fail" "$(cat $RESULTS_DIR/results/indicator_gen_test.txt)"
145+
fi
146+
147+
# Test action indicator visibility
148+
echo -e "${BLUE}[RUN]${NC} Testing action indicator visibility..."
149+
if go test -v -run "TestActionIndicatorVisibility" ./internal/handlers/... > "$RESULTS_DIR/results/indicator_vis_test.txt" 2>&1; then
150+
record_result "All action indicators visible in output" "pass" ""
151+
else
152+
record_result "All action indicators visible in output" "fail" "$(cat $RESULTS_DIR/results/indicator_vis_test.txt)"
153+
fi
154+
155+
echo ""
156+
echo "----------------------------------------------------------------------"
157+
echo "Phase 4: Tool Call Generation Tests"
158+
echo "----------------------------------------------------------------------"
159+
160+
# Test tool call generation with tools
161+
echo -e "${BLUE}[RUN]${NC} Testing tool call generation with available tools..."
162+
if go test -v -run "TestGenerateActionToolCalls_WithTools" ./internal/handlers/... > "$RESULTS_DIR/results/gen_with_tools_test.txt" 2>&1; then
163+
record_result "Tool calls generated when tools available" "pass" ""
164+
else
165+
record_result "Tool calls generated when tools available" "fail" "$(cat $RESULTS_DIR/results/gen_with_tools_test.txt)"
166+
fi
167+
168+
# Test tool call generation without tools
169+
echo -e "${BLUE}[RUN]${NC} Testing tool call generation without tools..."
170+
if go test -v -run "TestGenerateActionToolCalls_NoTools" ./internal/handlers/... > "$RESULTS_DIR/results/gen_no_tools_test.txt" 2>&1; then
171+
record_result "No tool calls generated when no tools available" "pass" ""
172+
else
173+
record_result "No tool calls generated when no tools available" "fail" "$(cat $RESULTS_DIR/results/gen_no_tools_test.txt)"
174+
fi
175+
176+
# Test search query tool call generation
177+
echo -e "${BLUE}[RUN]${NC} Testing search query tool call generation..."
178+
if go test -v -run "TestGenerateActionToolCalls_SearchQuery" ./internal/handlers/... > "$RESULTS_DIR/results/gen_search_test.txt" 2>&1; then
179+
record_result "Grep tool calls generated for search queries" "pass" ""
180+
else
181+
record_result "Grep tool calls generated for search queries" "fail" "$(cat $RESULTS_DIR/results/gen_search_test.txt)"
182+
fi
183+
184+
echo ""
185+
echo "----------------------------------------------------------------------"
186+
echo "Phase 5: Tool Call Structure Validation Tests"
187+
echo "----------------------------------------------------------------------"
188+
189+
# Test streaming tool call field validation
190+
echo -e "${BLUE}[RUN]${NC} Testing streaming tool call field validation..."
191+
if go test -v -run "TestStreamingToolCall_FieldValidation" ./internal/handlers/... > "$RESULTS_DIR/results/field_validation_test.txt" 2>&1; then
192+
record_result "StreamingToolCall fields validated correctly" "pass" ""
193+
else
194+
record_result "StreamingToolCall fields validated correctly" "fail" "$(cat $RESULTS_DIR/results/field_validation_test.txt)"
195+
fi
196+
197+
# Test tool call arguments validation
198+
echo -e "${BLUE}[RUN]${NC} Testing tool call arguments validation..."
199+
if go test -v -run "TestToolCallArgumentsValidation" ./internal/handlers/... > "$RESULTS_DIR/results/args_validation_test.txt" 2>&1; then
200+
record_result "Tool call arguments follow correct format (snake_case)" "pass" ""
201+
else
202+
record_result "Tool call arguments follow correct format (snake_case)" "fail" "$(cat $RESULTS_DIR/results/args_validation_test.txt)"
203+
fi
204+
205+
echo ""
206+
echo "----------------------------------------------------------------------"
207+
echo "Phase 6: Code Compilation and Build Test"
208+
echo "----------------------------------------------------------------------"
209+
210+
# Test that the code compiles
211+
echo -e "${BLUE}[RUN]${NC} Testing code compilation..."
212+
if go build -o /dev/null ./cmd/helixagent/ > "$RESULTS_DIR/results/build_test.txt" 2>&1; then
213+
record_result "HelixAgent compiles successfully with tool triggering changes" "pass" ""
214+
else
215+
record_result "HelixAgent compiles successfully with tool triggering changes" "fail" "$(cat $RESULTS_DIR/results/build_test.txt)"
216+
fi
217+
218+
# Test handlers package compiles
219+
echo -e "${BLUE}[RUN]${NC} Testing handlers package compilation..."
220+
if go build -o /dev/null ./internal/handlers/ > "$RESULTS_DIR/results/handlers_build_test.txt" 2>&1; then
221+
record_result "Handlers package compiles with DebatePositionResponse" "pass" ""
222+
else
223+
record_result "Handlers package compiles with DebatePositionResponse" "fail" "$(cat $RESULTS_DIR/results/handlers_build_test.txt)"
224+
fi
225+
226+
echo ""
227+
echo "======================================================================"
228+
echo " CHALLENGE RESULTS"
229+
echo "======================================================================"
230+
echo ""
231+
echo -e "Total Tests: ${BLUE}$TOTAL_TESTS${NC}"
232+
echo -e "Passed: ${GREEN}$PASSED_TESTS${NC}"
233+
echo -e "Failed: ${RED}$FAILED_TESTS${NC}"
234+
echo ""
235+
236+
if [ $FAILED_TESTS -eq 0 ]; then
237+
echo -e "${GREEN}============================================${NC}"
238+
echo -e "${GREEN} ALL TESTS PASSED - CHALLENGE COMPLETE! ${NC}"
239+
echo -e "${GREEN}============================================${NC}"
240+
echo ""
241+
echo -e "${CYAN}Tool triggering in AI Debate is working correctly:${NC}"
242+
echo " - DebatePositionResponse returns content AND tool_calls"
243+
echo " - Tool calls collected from all 5 debate positions"
244+
echo " - ACTION PHASE uses collected tool_calls"
245+
echo " - Action indicators (<---, --->) are visible"
246+
echo " - Tool_calls are NOT discarded"
247+
echo ""
248+
# Save success summary
249+
cat > "$RESULTS_DIR/CHALLENGE_PASSED.txt" << EOF
250+
AI Debate Tool Triggering Challenge PASSED
251+
==========================================
252+
253+
Date: $(date)
254+
Total Tests: $TOTAL_TESTS
255+
Passed: $PASSED_TESTS
256+
Failed: $FAILED_TESTS
257+
258+
The AI Debate system correctly:
259+
1. Returns tool_calls from generateRealDebateResponse()
260+
2. Collects tool_calls from all debate positions
261+
3. Uses collected tool_calls in ACTION PHASE
262+
4. Shows action indicators (<---, --->)
263+
5. Does NOT discard tool_calls from LLM responses
264+
EOF
265+
exit 0
266+
else
267+
echo -e "${RED}============================================${NC}"
268+
echo -e "${RED} CHALLENGE FAILED - $FAILED_TESTS TESTS FAILED ${NC}"
269+
echo -e "${RED}============================================${NC}"
270+
echo ""
271+
echo "See $RESULTS_DIR/results/failures.txt for details"
272+
exit 1
273+
fi

0 commit comments

Comments
 (0)