|
| 1 | +#!/bin/bash |
| 2 | +# reliable_fallback_challenge.sh |
| 3 | +# |
| 4 | +# CRITICAL: This challenge validates that the AI Debate Team has working fallback providers |
| 5 | +# |
| 6 | +# PROBLEM SOLVED: When OAuth providers (Claude, Qwen) fail due to token restrictions, |
| 7 | +# the system MUST fall back to reliable API providers (Cerebras, Mistral, DeepSeek, Gemini) |
| 8 | +# instead of failing completely. |
| 9 | +# |
| 10 | +# ISSUE HISTORY: |
| 11 | +# - Original fallback chain was: Claude -> Zen -> Zen (all failing) |
| 12 | +# - Claude OAuth tokens are restricted to Claude Code product only |
| 13 | +# - Zen provider had 401 errors causing circuit breaker to open |
| 14 | +# - Result: All debate positions showed "Unable to provide analysis at this time" |
| 15 | +# |
| 16 | +# FIX: Added collectReliableAPIProviders() which ensures Cerebras, Mistral, DeepSeek, |
| 17 | +# and Gemini are ALWAYS included as fallbacks before free models. |
| 18 | + |
| 19 | +# Don't use set -e as it causes issues with counter increments and grep patterns |
| 20 | +# set -e |
| 21 | + |
| 22 | +# Colors |
| 23 | +RED='\033[0;31m' |
| 24 | +GREEN='\033[0;32m' |
| 25 | +YELLOW='\033[1;33m' |
| 26 | +BLUE='\033[0;34m' |
| 27 | +NC='\033[0m' |
| 28 | + |
| 29 | +# Configuration |
| 30 | +HELIX_URL="${HELIX_URL:-http://localhost:7061}" |
| 31 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 32 | +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" |
| 33 | + |
| 34 | +# Counters |
| 35 | +PASSED=0 |
| 36 | +FAILED=0 |
| 37 | +TOTAL=0 |
| 38 | + |
| 39 | +# Helper functions |
| 40 | +log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } |
| 41 | +log_pass() { echo -e "${GREEN}[PASS]${NC} $1"; ((PASSED++)) || true; ((TOTAL++)) || true; } |
| 42 | +log_fail() { echo -e "${RED}[FAIL]${NC} $1"; ((FAILED++)) || true; ((TOTAL++)) || true; } |
| 43 | +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } |
| 44 | + |
| 45 | +check_result() { |
| 46 | + if [ $1 -eq 0 ]; then |
| 47 | + log_pass "$2" |
| 48 | + else |
| 49 | + log_fail "$2" |
| 50 | + fi |
| 51 | +} |
| 52 | + |
| 53 | +# Start tests |
| 54 | +echo "" |
| 55 | +echo "═══════════════════════════════════════════════════════════════════════════" |
| 56 | +echo " RELIABLE FALLBACK CHALLENGE" |
| 57 | +echo " Validates that working providers are in the fallback chain" |
| 58 | +echo "═══════════════════════════════════════════════════════════════════════════" |
| 59 | +echo "" |
| 60 | + |
| 61 | +# Test 1: Server is healthy |
| 62 | +log_info "Test 1: Checking server health..." |
| 63 | +HEALTH=$(curl -s --connect-timeout 10 "${HELIX_URL}/health" 2>/dev/null || echo "") |
| 64 | +if [ "$HEALTH" = '{"status":"healthy"}' ]; then |
| 65 | + log_pass "Server is healthy" |
| 66 | +else |
| 67 | + log_fail "Server is not healthy: $HEALTH" |
| 68 | + exit 1 |
| 69 | +fi |
| 70 | + |
| 71 | +# Test 2: Unit tests pass |
| 72 | +log_info "Test 2: Running unit tests for fallback mechanism..." |
| 73 | +cd "${PROJECT_ROOT}" |
| 74 | +if go test -run "TestReliableAPIProvidersCollection|TestFallbackChainIncludesWorkingProviders|TestDebateTeamMustHaveWorkingFallbacks" ./internal/services/ > /dev/null 2>&1; then |
| 75 | + log_pass "Unit tests pass" |
| 76 | +else |
| 77 | + log_fail "Unit tests failed" |
| 78 | +fi |
| 79 | + |
| 80 | +# Test 3: Reliable API providers are defined |
| 81 | +log_info "Test 3: Checking reliable provider model definitions..." |
| 82 | +CEREBRAS_MODEL=$(grep -o 'Cerebras: "llama-3.3-70b"' "${PROJECT_ROOT}/internal/services/debate_team_config.go" || echo "") |
| 83 | +MISTRAL_MODEL=$(grep -o 'Mistral: "mistral-large-latest"' "${PROJECT_ROOT}/internal/services/debate_team_config.go" || echo "") |
| 84 | + |
| 85 | +if [ -n "$CEREBRAS_MODEL" ] && [ -n "$MISTRAL_MODEL" ]; then |
| 86 | + log_pass "Reliable provider models are defined" |
| 87 | +else |
| 88 | + log_fail "Reliable provider models not found in code" |
| 89 | +fi |
| 90 | + |
| 91 | +# Test 4: collectReliableAPIProviders method exists |
| 92 | +log_info "Test 4: Checking collectReliableAPIProviders method exists..." |
| 93 | +if grep -q "func (dtc \*DebateTeamConfig) collectReliableAPIProviders()" "${PROJECT_ROOT}/internal/services/debate_team_config.go"; then |
| 94 | + log_pass "collectReliableAPIProviders method exists" |
| 95 | +else |
| 96 | + log_fail "collectReliableAPIProviders method not found" |
| 97 | +fi |
| 98 | + |
| 99 | +# Test 5: collectReliableAPIProviders is called before free models |
| 100 | +log_info "Test 5: Verifying collection order (reliable before free)..." |
| 101 | +CALL_ORDER=$(grep -n "collect.*Models\|collect.*Providers" "${PROJECT_ROOT}/internal/services/debate_team_config.go" | grep -v "func" || echo "") |
| 102 | +RELIABLE_LINE=$(echo "$CALL_ORDER" | grep "ReliableAPI" | head -1 | cut -d: -f1) |
| 103 | +ZEN_LINE=$(echo "$CALL_ORDER" | grep "ZenModels" | head -1 | cut -d: -f1) |
| 104 | +OPENROUTER_LINE=$(echo "$CALL_ORDER" | grep "OpenRouter" | head -1 | cut -d: -f1) |
| 105 | + |
| 106 | +if [ -n "$RELIABLE_LINE" ] && [ -n "$ZEN_LINE" ]; then |
| 107 | + if [ "$RELIABLE_LINE" -lt "$ZEN_LINE" ]; then |
| 108 | + log_pass "Reliable providers collected before Zen models" |
| 109 | + else |
| 110 | + log_fail "Reliable providers should be collected BEFORE Zen models" |
| 111 | + fi |
| 112 | +else |
| 113 | + log_warn "Could not verify collection order" |
| 114 | + ((TOTAL++)) || true |
| 115 | +fi |
| 116 | + |
| 117 | +# Test 6: API actually responds with content (not "Unable to provide analysis") |
| 118 | +log_info "Test 6: Testing actual API response..." |
| 119 | +# NOTE: Cognee timeouts can slow this down, so we use a longer timeout |
| 120 | +RESPONSE=$(curl -s -X POST "${HELIX_URL}/v1/chat/completions" \ |
| 121 | + -H "Content-Type: application/json" \ |
| 122 | + -d '{"model":"helixagent-debate","messages":[{"role":"user","content":"What is 1+1?"}],"max_tokens":50}' \ |
| 123 | + --connect-timeout 30 --max-time 120 2>/dev/null || echo "") |
| 124 | + |
| 125 | +if echo "$RESPONSE" | grep -q '"content"'; then |
| 126 | + CONTENT=$(echo "$RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['choices'][0]['message']['content'])" 2>/dev/null || echo "") |
| 127 | + if [ -n "$CONTENT" ] && [ "$CONTENT" != "Unable to provide analysis at this time." ]; then |
| 128 | + log_pass "API returns actual content: ${CONTENT:0:50}..." |
| 129 | + else |
| 130 | + log_fail "API returns fallback message instead of real content" |
| 131 | + fi |
| 132 | +else |
| 133 | + log_fail "API response malformed: $RESPONSE" |
| 134 | +fi |
| 135 | + |
| 136 | +# Test 7: Server logs show Cerebras/Mistral being used |
| 137 | +log_info "Test 7: Checking if Cerebras/Mistral are being used in requests..." |
| 138 | +LOG_CHECK=$(tail -100 /tmp/helix_new.log 2>/dev/null | grep -E "Cerebras API call completed|Mistral API call completed" | head -1 || echo "") |
| 139 | +if [ -n "$LOG_CHECK" ]; then |
| 140 | + log_pass "Working providers are being used: ${LOG_CHECK:0:60}..." |
| 141 | +else |
| 142 | + log_warn "Could not verify provider usage in logs (may need fresh request)" |
| 143 | + ((TOTAL++)) || true |
| 144 | +fi |
| 145 | + |
| 146 | +# Test 8: No circuit breakers blocking all fallbacks |
| 147 | +log_info "Test 8: Checking circuit breaker status..." |
| 148 | +CIRCUIT_ERRORS=$(tail -50 /tmp/helix_new.log 2>/dev/null | grep -c "circuit breaker is open" 2>/dev/null | tr -d '\n' || echo "0") |
| 149 | +# Handle empty result |
| 150 | +if [ -z "$CIRCUIT_ERRORS" ]; then CIRCUIT_ERRORS=0; fi |
| 151 | +if [ "$CIRCUIT_ERRORS" -lt 5 ] 2>/dev/null; then |
| 152 | + log_pass "Circuit breakers are not blocking all fallbacks" |
| 153 | +else |
| 154 | + log_fail "Too many circuit breaker open errors: $CIRCUIT_ERRORS" |
| 155 | +fi |
| 156 | + |
| 157 | +# Test 9: Environment variables for reliable providers |
| 158 | +log_info "Test 9: Checking required environment variables..." |
| 159 | +MISSING_VARS=0 |
| 160 | +for VAR in CEREBRAS_API_KEY MISTRAL_API_KEY; do |
| 161 | + if [ -z "${!VAR}" ]; then |
| 162 | + log_warn "$VAR not set" |
| 163 | + ((MISSING_VARS++)) || true |
| 164 | + fi |
| 165 | +done |
| 166 | + |
| 167 | +if [ "$MISSING_VARS" -eq 0 ]; then |
| 168 | + log_pass "All reliable provider API keys are set" |
| 169 | +else |
| 170 | + log_warn "$MISSING_VARS API keys missing - some fallbacks unavailable" |
| 171 | + ((TOTAL++)) || true |
| 172 | +fi |
| 173 | + |
| 174 | +# Test 10: getFallbackLLMs prioritizes non-OAuth |
| 175 | +log_info "Test 10: Running getFallbackLLMs priority test..." |
| 176 | +if go test -v -run "TestFallbackChainIncludesWorkingProviders/getFallbackLLMs_prioritizes" ./internal/services/ 2>&1 | grep -q "PASS"; then |
| 177 | + log_pass "getFallbackLLMs correctly prioritizes non-OAuth providers" |
| 178 | +else |
| 179 | + log_fail "getFallbackLLMs priority test failed" |
| 180 | +fi |
| 181 | + |
| 182 | +# Summary |
| 183 | +echo "" |
| 184 | +echo "═══════════════════════════════════════════════════════════════════════════" |
| 185 | +echo " CHALLENGE SUMMARY" |
| 186 | +echo "═══════════════════════════════════════════════════════════════════════════" |
| 187 | +echo "" |
| 188 | +echo -e " Total Tests: ${TOTAL}" |
| 189 | +echo -e " ${GREEN}Passed:${NC} ${PASSED}" |
| 190 | +echo -e " ${RED}Failed:${NC} ${FAILED}" |
| 191 | +echo "" |
| 192 | + |
| 193 | +if [ "$FAILED" -eq 0 ]; then |
| 194 | + echo -e "${GREEN}═══════════════════════════════════════════════════════════════════════════${NC}" |
| 195 | + echo -e "${GREEN} ✅ CHALLENGE PASSED - Reliable fallback mechanism is working!${NC}" |
| 196 | + echo -e "${GREEN}═══════════════════════════════════════════════════════════════════════════${NC}" |
| 197 | + exit 0 |
| 198 | +else |
| 199 | + echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}" |
| 200 | + echo -e "${RED} ❌ CHALLENGE FAILED - ${FAILED} tests failed${NC}" |
| 201 | + echo -e "${RED}═══════════════════════════════════════════════════════════════════════════${NC}" |
| 202 | + exit 1 |
| 203 | +fi |
0 commit comments