Skip to content

Commit 82fa652

Browse files
committed
Auto-commit
1 parent 26246be commit 82fa652

15 files changed

+3380
-0
lines changed

Makefile

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,75 @@ help:
755755
@echo " setup-prod Setup production environment"
756756
@echo " help Show this help message"
757757

758+
# =============================================================================
759+
# CI/CD VALIDATION TARGETS (Prevention Measures)
760+
# =============================================================================
761+
762+
ci-validate-fallback:
763+
@echo "🔍 CI/CD: Validating reliable fallback mechanism..."
764+
@./challenges/scripts/reliable_fallback_challenge.sh || { echo "❌ Fallback validation failed!"; exit 1; }
765+
@echo "✅ Fallback mechanism validated"
766+
767+
ci-validate-monitoring:
768+
@echo "🔍 CI/CD: Validating monitoring systems..."
769+
@go test -v -run "TestCircuitBreakerMonitor|TestOAuthTokenMonitor|TestProviderHealthMonitor|TestFallbackChainValidator" ./internal/services/... || { echo "❌ Monitoring validation failed!"; exit 1; }
770+
@echo "✅ Monitoring systems validated"
771+
772+
ci-validate-all:
773+
@echo "🔍 CI/CD: Running all validation checks..."
774+
@$(MAKE) ci-validate-fallback
775+
@$(MAKE) ci-validate-monitoring
776+
@echo "✅ All CI/CD validations passed"
777+
778+
ci-pre-commit:
779+
@echo "🔍 Pre-commit validation..."
780+
@$(MAKE) fmt
781+
@$(MAKE) vet
782+
@$(MAKE) ci-validate-fallback
783+
@go test -run "TestReliableAPIProvidersCollection|TestFallbackChainIncludesWorkingProviders" ./internal/services/...
784+
@echo "✅ Pre-commit validation passed"
785+
786+
ci-pre-push:
787+
@echo "🔍 Pre-push validation..."
788+
@$(MAKE) ci-pre-commit
789+
@$(MAKE) test-unit
790+
@$(MAKE) ci-validate-monitoring
791+
@echo "✅ Pre-push validation passed"
792+
793+
# Monitoring endpoints
794+
monitoring-status:
795+
@echo "📊 Checking monitoring status..."
796+
@curl -s http://localhost:7061/v1/monitoring/status | jq .
797+
798+
monitoring-circuit-breakers:
799+
@echo "📊 Checking circuit breakers..."
800+
@curl -s http://localhost:7061/v1/monitoring/circuit-breakers | jq .
801+
802+
monitoring-oauth-tokens:
803+
@echo "📊 Checking OAuth tokens..."
804+
@curl -s http://localhost:7061/v1/monitoring/oauth-tokens | jq .
805+
806+
monitoring-provider-health:
807+
@echo "📊 Checking provider health..."
808+
@curl -s http://localhost:7061/v1/monitoring/provider-health | jq .
809+
810+
monitoring-fallback-chain:
811+
@echo "📊 Checking fallback chain..."
812+
@curl -s http://localhost:7061/v1/monitoring/fallback-chain | jq .
813+
814+
monitoring-reset-circuits:
815+
@echo "🔄 Resetting all circuit breakers..."
816+
@curl -s -X POST http://localhost:7061/v1/monitoring/circuit-breakers/reset-all | jq .
817+
@echo "✅ Circuit breakers reset"
818+
819+
monitoring-validate-fallback:
820+
@echo "🔍 Validating fallback chain..."
821+
@curl -s -X POST http://localhost:7061/v1/monitoring/fallback-chain/validate | jq .
822+
823+
monitoring-force-health-check:
824+
@echo "🔍 Forcing provider health check..."
825+
@curl -s -X POST http://localhost:7061/v1/monitoring/provider-health/check | jq .
826+
758827
# =============================================================================
759828
# LLMSVERIFIER INTEGRATION TARGETS
760829
# =============================================================================
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/bin/bash
2+
# reliable_fallback_challenge.sh
3+
#
4+
# CRITICAL: This challenge validates that the AI Debate Team has working fallback providers
5+
#
6+
# PROBLEM SOLVED: When OAuth providers (Claude, Qwen) fail due to token restrictions,
7+
# the system MUST fall back to reliable API providers (Cerebras, Mistral, DeepSeek, Gemini)
8+
# instead of failing completely.
9+
#
10+
# ISSUE HISTORY:
11+
# - Original fallback chain was: Claude -> Zen -> Zen (all failing)
12+
# - Claude OAuth tokens are restricted to Claude Code product only
13+
# - Zen provider had 401 errors causing circuit breaker to open
14+
# - Result: All debate positions showed "Unable to provide analysis at this time"
15+
#
16+
# FIX: Added collectReliableAPIProviders() which ensures Cerebras, Mistral, DeepSeek,
17+
# and Gemini are ALWAYS included as fallbacks before free models.
18+
19+
# Don't use set -e as it causes issues with counter increments and grep patterns
20+
# set -e
21+
22+
# Colors
23+
RED='\033[0;31m'
24+
GREEN='\033[0;32m'
25+
YELLOW='\033[1;33m'
26+
BLUE='\033[0;34m'
27+
NC='\033[0m'
28+
29+
# Configuration
30+
HELIX_URL="${HELIX_URL:-http://localhost:7061}"
31+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
32+
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
33+
34+
# Counters
35+
PASSED=0
36+
FAILED=0
37+
TOTAL=0
38+
39+
# Helper functions
40+
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
41+
log_pass() { echo -e "${GREEN}[PASS]${NC} $1"; ((PASSED++)) || true; ((TOTAL++)) || true; }
42+
log_fail() { echo -e "${RED}[FAIL]${NC} $1"; ((FAILED++)) || true; ((TOTAL++)) || true; }
43+
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
44+
45+
check_result() {
46+
if [ $1 -eq 0 ]; then
47+
log_pass "$2"
48+
else
49+
log_fail "$2"
50+
fi
51+
}
52+
53+
# Start tests
54+
echo ""
55+
echo "═══════════════════════════════════════════════════════════════════════════"
56+
echo " RELIABLE FALLBACK CHALLENGE"
57+
echo " Validates that working providers are in the fallback chain"
58+
echo "═══════════════════════════════════════════════════════════════════════════"
59+
echo ""
60+
61+
# Test 1: Server is healthy
62+
log_info "Test 1: Checking server health..."
63+
HEALTH=$(curl -s --connect-timeout 10 "${HELIX_URL}/health" 2>/dev/null || echo "")
64+
if [ "$HEALTH" = '{"status":"healthy"}' ]; then
65+
log_pass "Server is healthy"
66+
else
67+
log_fail "Server is not healthy: $HEALTH"
68+
exit 1
69+
fi
70+
71+
# Test 2: Unit tests pass
72+
log_info "Test 2: Running unit tests for fallback mechanism..."
73+
cd "${PROJECT_ROOT}"
74+
if go test -run "TestReliableAPIProvidersCollection|TestFallbackChainIncludesWorkingProviders|TestDebateTeamMustHaveWorkingFallbacks" ./internal/services/ > /dev/null 2>&1; then
75+
log_pass "Unit tests pass"
76+
else
77+
log_fail "Unit tests failed"
78+
fi
79+
80+
# Test 3: Reliable API providers are defined
81+
log_info "Test 3: Checking reliable provider model definitions..."
82+
CEREBRAS_MODEL=$(grep -o 'Cerebras: "llama-3.3-70b"' "${PROJECT_ROOT}/internal/services/debate_team_config.go" || echo "")
83+
MISTRAL_MODEL=$(grep -o 'Mistral: "mistral-large-latest"' "${PROJECT_ROOT}/internal/services/debate_team_config.go" || echo "")
84+
85+
if [ -n "$CEREBRAS_MODEL" ] && [ -n "$MISTRAL_MODEL" ]; then
86+
log_pass "Reliable provider models are defined"
87+
else
88+
log_fail "Reliable provider models not found in code"
89+
fi
90+
91+
# Test 4: collectReliableAPIProviders method exists
92+
log_info "Test 4: Checking collectReliableAPIProviders method exists..."
93+
if grep -q "func (dtc \*DebateTeamConfig) collectReliableAPIProviders()" "${PROJECT_ROOT}/internal/services/debate_team_config.go"; then
94+
log_pass "collectReliableAPIProviders method exists"
95+
else
96+
log_fail "collectReliableAPIProviders method not found"
97+
fi
98+
99+
# Test 5: collectReliableAPIProviders is called before free models
100+
log_info "Test 5: Verifying collection order (reliable before free)..."
101+
CALL_ORDER=$(grep -n "collect.*Models\|collect.*Providers" "${PROJECT_ROOT}/internal/services/debate_team_config.go" | grep -v "func" || echo "")
102+
RELIABLE_LINE=$(echo "$CALL_ORDER" | grep "ReliableAPI" | head -1 | cut -d: -f1)
103+
ZEN_LINE=$(echo "$CALL_ORDER" | grep "ZenModels" | head -1 | cut -d: -f1)
104+
OPENROUTER_LINE=$(echo "$CALL_ORDER" | grep "OpenRouter" | head -1 | cut -d: -f1)
105+
106+
if [ -n "$RELIABLE_LINE" ] && [ -n "$ZEN_LINE" ]; then
107+
if [ "$RELIABLE_LINE" -lt "$ZEN_LINE" ]; then
108+
log_pass "Reliable providers collected before Zen models"
109+
else
110+
log_fail "Reliable providers should be collected BEFORE Zen models"
111+
fi
112+
else
113+
log_warn "Could not verify collection order"
114+
((TOTAL++)) || true
115+
fi
116+
117+
# Test 6: API actually responds with content (not "Unable to provide analysis")
118+
log_info "Test 6: Testing actual API response..."
119+
# NOTE: Cognee timeouts can slow this down, so we use a longer timeout
120+
RESPONSE=$(curl -s -X POST "${HELIX_URL}/v1/chat/completions" \
121+
-H "Content-Type: application/json" \
122+
-d '{"model":"helixagent-debate","messages":[{"role":"user","content":"What is 1+1?"}],"max_tokens":50}' \
123+
--connect-timeout 30 --max-time 120 2>/dev/null || echo "")
124+
125+
if echo "$RESPONSE" | grep -q '"content"'; then
126+
CONTENT=$(echo "$RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['choices'][0]['message']['content'])" 2>/dev/null || echo "")
127+
if [ -n "$CONTENT" ] && [ "$CONTENT" != "Unable to provide analysis at this time." ]; then
128+
log_pass "API returns actual content: ${CONTENT:0:50}..."
129+
else
130+
log_fail "API returns fallback message instead of real content"
131+
fi
132+
else
133+
log_fail "API response malformed: $RESPONSE"
134+
fi
135+
136+
# Test 7: Server logs show Cerebras/Mistral being used
137+
log_info "Test 7: Checking if Cerebras/Mistral are being used in requests..."
138+
LOG_CHECK=$(tail -100 /tmp/helix_new.log 2>/dev/null | grep -E "Cerebras API call completed|Mistral API call completed" | head -1 || echo "")
139+
if [ -n "$LOG_CHECK" ]; then
140+
log_pass "Working providers are being used: ${LOG_CHECK:0:60}..."
141+
else
142+
log_warn "Could not verify provider usage in logs (may need fresh request)"
143+
((TOTAL++)) || true
144+
fi
145+
146+
# Test 8: No circuit breakers blocking all fallbacks
147+
log_info "Test 8: Checking circuit breaker status..."
148+
CIRCUIT_ERRORS=$(tail -50 /tmp/helix_new.log 2>/dev/null | grep -c "circuit breaker is open" 2>/dev/null | tr -d '\n' || echo "0")
149+
# Handle empty result
150+
if [ -z "$CIRCUIT_ERRORS" ]; then CIRCUIT_ERRORS=0; fi
151+
if [ "$CIRCUIT_ERRORS" -lt 5 ] 2>/dev/null; then
152+
log_pass "Circuit breakers are not blocking all fallbacks"
153+
else
154+
log_fail "Too many circuit breaker open errors: $CIRCUIT_ERRORS"
155+
fi
156+
157+
# Test 9: Environment variables for reliable providers
158+
log_info "Test 9: Checking required environment variables..."
159+
MISSING_VARS=0
160+
for VAR in CEREBRAS_API_KEY MISTRAL_API_KEY; do
161+
if [ -z "${!VAR}" ]; then
162+
log_warn "$VAR not set"
163+
((MISSING_VARS++)) || true
164+
fi
165+
done
166+
167+
if [ "$MISSING_VARS" -eq 0 ]; then
168+
log_pass "All reliable provider API keys are set"
169+
else
170+
log_warn "$MISSING_VARS API keys missing - some fallbacks unavailable"
171+
((TOTAL++)) || true
172+
fi
173+
174+
# Test 10: getFallbackLLMs prioritizes non-OAuth
175+
log_info "Test 10: Running getFallbackLLMs priority test..."
176+
if go test -v -run "TestFallbackChainIncludesWorkingProviders/getFallbackLLMs_prioritizes" ./internal/services/ 2>&1 | grep -q "PASS"; then
177+
log_pass "getFallbackLLMs correctly prioritizes non-OAuth providers"
178+
else
179+
log_fail "getFallbackLLMs priority test failed"
180+
fi
181+
182+
# Summary
183+
echo ""
184+
echo "═══════════════════════════════════════════════════════════════════════════"
185+
echo " CHALLENGE SUMMARY"
186+
echo "═══════════════════════════════════════════════════════════════════════════"
187+
echo ""
188+
echo -e " Total Tests: ${TOTAL}"
189+
echo -e " ${GREEN}Passed:${NC} ${PASSED}"
190+
echo -e " ${RED}Failed:${NC} ${FAILED}"
191+
echo ""
192+
193+
if [ "$FAILED" -eq 0 ]; then
194+
echo -e "${GREEN}═══════════════════════════════════════════════════════════════════════════${NC}"
195+
echo -e "${GREEN} ✅ CHALLENGE PASSED - Reliable fallback mechanism is working!${NC}"
196+
echo -e "${GREEN}═══════════════════════════════════════════════════════════════════════════${NC}"
197+
exit 0
198+
else
199+
echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
200+
echo -e "${RED} ❌ CHALLENGE FAILED - ${FAILED} tests failed${NC}"
201+
echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}"
202+
exit 1
203+
fi

helixagent

19 MB
Binary file not shown.

0 commit comments

Comments
 (0)