claude-failover/scripts/test-and-migrate.sh

311 lines
12 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# test-and-migrate.sh — Test complet du daemon claude-failover puis migration
# Usage: bash test-and-migrate.sh [--migrate]
# Sans flag : tests uniquement
# --migrate : tests + désactivation des crons si tous les tests passent
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
PASS=0
FAIL=0
WARN=0
ok() { echo -e "${GREEN}✅ PASS${NC}: $1"; PASS=$((PASS + 1)); }
fail() { echo -e "${RED}❌ FAIL${NC}: $1"; FAIL=$((FAIL + 1)); }
warn() { echo -e "${YELLOW}⚠️ WARN${NC}: $1"; WARN=$((WARN + 1)); }
MIGRATE=false
[[ "${1:-}" == "--migrate" ]] && MIGRATE=true
echo "══════════════════════════════════════════════════"
echo " claude-failover — Test complet"
echo "══════════════════════════════════════════════════"
echo ""
# ─── 1. Service systemd ───
echo "── 1. Service systemd ──"
if systemctl is-active --quiet claude-failover; then
ok "Service claude-failover actif"
else
fail "Service claude-failover inactif"
fi
if systemctl is-enabled --quiet claude-failover; then
ok "Service claude-failover enabled (démarrage auto)"
else
fail "Service claude-failover non enabled"
fi
# ─── 2. API HTTP ───
echo ""
echo "── 2. API HTTP ──"
HEALTH=$(curl -sf http://127.0.0.1:9090/health 2>/dev/null || echo "")
if echo "$HEALTH" | grep -q '"status":"ok"'; then
ok "GET /health → OK"
else
fail "GET /health ne répond pas ($HEALTH)"
fi
STATUS=$(curl -sf http://127.0.0.1:9090/status 2>/dev/null || echo "")
if echo "$STATUS" | python3 -c "import json,sys; d=json.load(sys.stdin); assert 'sessions' in d" 2>/dev/null; then
ok "GET /status → JSON avec sessions"
else
fail "GET /status format invalide"
fi
# ─── 3. Binaire ───
echo ""
echo "── 3. Binaire ──"
if [[ -x /usr/local/bin/claude-failover ]]; then
ok "Binaire /usr/local/bin/claude-failover existe et exécutable"
else
fail "Binaire manquant"
fi
# ─── 4. Config ───
echo ""
echo "── 4. Configuration ──"
if [[ -f /etc/claude-failover/config.yaml ]]; then
ok "Config /etc/claude-failover/config.yaml existe"
else
fail "Config manquante"
fi
if [[ -f /etc/claude-failover/env ]]; then
perms=$(stat -c %a /etc/claude-failover/env)
if [[ "$perms" == "600" ]]; then
ok "Env file permissions 600"
else
warn "Env file permissions $perms (attendu 600)"
fi
else
fail "Env file /etc/claude-failover/env manquant"
fi
# ─── 5. Symlink ~/.claude ───
echo ""
echo "── 5. Symlink ~/.claude ──"
if [[ -L ~/.claude ]]; then
target=$(readlink ~/.claude)
if [[ "$target" == *"claude-compte1"* || "$target" == *"claude-compte2"* ]]; then
ok "~/.claude → $target"
else
fail "~/.claude pointe vers $target (attendu compte1 ou compte2)"
fi
else
fail "~/.claude n'est pas un symlink"
fi
# ─── 6. Symlink projects partagé ───
echo ""
echo "── 6. Projects partagé ──"
for dir in ~/.claude-compte1/projects ~/.claude-compte2/projects; do
if [[ -L "$dir" ]]; then
target=$(readlink -f "$dir")
if [[ "$target" == *"claude-projects-shared"* ]]; then
ok "$dir → shared"
else
warn "$dir pointe vers $target"
fi
else
warn "$dir n'est pas un symlink"
fi
done
# ─── 7. Sessions tmux ───
echo ""
echo "── 7. Sessions tmux ──"
total_sessions=$(tmux ls 2>/dev/null | wc -l)
pool_sessions=$(tmux ls 2>/dev/null | grep -c "ccl-auto" || echo 0)
interactive_sessions=$(tmux ls 2>/dev/null | grep -cE "^ccl-[0-9]" || echo 0)
if [[ $total_sessions -ge 20 ]]; then
ok "20+ sessions tmux ($total_sessions total)"
else
warn "$total_sessions sessions (attendu 20)"
fi
if [[ $pool_sessions -ge 10 ]]; then
ok "$pool_sessions sessions pool (ccl-auto-*)"
else
fail "Seulement $pool_sessions sessions pool (attendu 10)"
fi
if [[ $interactive_sessions -ge 10 ]]; then
ok "$interactive_sessions sessions interactives (ccl-0..9)"
else
warn "$interactive_sessions sessions interactives (attendu 10)"
fi
# ─── 8. Credentials ───
echo ""
echo "── 8. Credentials ──"
for compte in ~/.claude-compte1 ~/.claude-compte2; do
if [[ -f "$compte/.credentials.json" ]]; then
sub_type=$(python3 -c "import json; d=json.load(open('$compte/.credentials.json')); print(d.get('claudeAiOauth',{}).get('subscriptionType','unknown'))" 2>/dev/null || echo "unknown")
ok "$(basename $compte)$sub_type"
else
fail "$(basename $compte) → pas de credentials"
fi
done
# Vérifier que les deux comptes sont différents
if [[ -f ~/.claude-compte1/.credentials.json && -f ~/.claude-compte2/.credentials.json ]]; then
id1=$(python3 -c "import json; d=json.load(open('$HOME/.claude-compte1/.credentials.json')); print(d.get('claudeAiOauth',{}).get('accessToken','')[:20])" 2>/dev/null)
id2=$(python3 -c "import json; d=json.load(open('$HOME/.claude-compte2/.credentials.json')); print(d.get('claudeAiOauth',{}).get('accessToken','')[:20])" 2>/dev/null)
if [[ "$id1" != "$id2" ]]; then
ok "Deux comptes distincts confirmés"
else
fail "Les deux comptes ont les mêmes credentials !"
fi
fi
# ─── 9. État orchestrateur ───
echo ""
echo "── 9. État orchestrateur ──"
if [[ -f /tmp/orchestrator-state.json ]]; then
active=$(jq -r '.quota.active_account // "unknown"' /tmp/orchestrator-state.json)
paused=$(jq -r '.quota.paused // false' /tmp/orchestrator-state.json)
ok "State: active=$active, paused=$paused"
else
warn "orchestrator-state.json absent"
fi
# ─── 10. Go tests ───
echo ""
echo "── 10. Go tests ──"
cd /home/ubuntu/projects/claude-failover
if HOME=/home/ubuntu GOPATH=/home/ubuntu/go GOCACHE=/home/ubuntu/.cache/go go test ./... > /tmp/go-test-output.txt 2>&1; then
ok "go test ./... → PASS"
else
fail "go test ./... → FAIL (voir /tmp/go-test-output.txt)"
cat /tmp/go-test-output.txt | tail -10
fi
# Vérifier que les tests n'ont PAS écrasé ~/.claude
current_target=$(readlink ~/.claude 2>/dev/null || echo "BROKEN")
if [[ "$current_target" == *"TestKill"* || "$current_target" == *"/tmp/"* ]]; then
fail "CRITIQUE: go test a écrasé ~/.claude → $current_target"
else
ok "~/.claude intact après go test"
fi
# ─── 11. Daemon logs ───
echo ""
echo "── 11. Daemon logs ──"
errors=$(journalctl -u claude-failover --no-pager -n 50 2>/dev/null | grep -ciE "error|panic|fatal") || errors=0
if [[ $errors -eq 0 ]]; then
ok "0 erreurs dans les 50 dernières lignes de log"
else
warn "$errors erreur(s) dans les logs récents"
journalctl -u claude-failover --no-pager -n 50 | grep -iE "error|panic|fatal" | tail -5
fi
# ─── 12. Watchdog shell fixes ───
echo ""
echo "── 12. Fixes shell en place ──"
WD="/home/ubuntu/projects/dev-management/agent-orchestrator/watchdog.sh"
DISP="/home/ubuntu/projects/dev-management/agent-orchestrator/dispatcher.sh"
LC="/home/ubuntu/projects/dev-management/agent-orchestrator/lib-common.sh"
QM="/home/ubuntu/projects/dev-management/agent-orchestrator/quota-monitor.sh"
grep -q "detect_missing_pool_sessions" "$WD" 2>/dev/null && ok "Watchdog: detect missing sessions" || fail "Watchdog: detect missing sessions MANQUANT"
grep -q "MANQUANTES" "$DISP" 2>/dev/null && ok "Dispatcher: logs détaillés busy/missing" || fail "Dispatcher: logs détaillés MANQUANT"
grep -q "_has_free_session" "$DISP" 2>/dev/null && ok "Dispatcher: skip GPU si 0 session" || fail "Dispatcher: skip GPU MANQUANT"
grep -q "SA0-SYMLINK" "$LC" 2>/dev/null && ok "Failover: flip symlink ~/.claude" || fail "Failover: flip symlink MANQUANT"
grep -q 'interactive_quota_status.*==.*blocked' "$DISP" 2>/dev/null && ok "Failover: trigger sur quota interactif" || fail "Failover: trigger interactif MANQUANT"
grep -q "\-\-force.*active_account\|active_account.*\-\-force" "$QM" 2>/dev/null && ok "Failover: graceful-switch --force" || warn "Failover: graceful-switch --force (vérifier manuellement)"
grep -q 'preferred_ai.*gpu' "/home/ubuntu/projects/dev-management/agent-orchestrator/batch-analyzer.sh" 2>/dev/null && ok "GPU: preferred_ai=gpu (no Claude fallback)" || warn "GPU: vérifier preferred_ai"
grep -q 'resend.com' "$LC" 2>/dev/null && ok "Notifications: email Resend dans lib-common" || warn "Notifications: email Resend à vérifier"
# ─── 13. @reboot cron ───
echo ""
echo "── 13. @reboot cron ──"
if crontab -l 2>/dev/null | grep -q setup-tmux; then
ok "@reboot cron setup-tmux.sh présent"
else
fail "@reboot cron setup-tmux.sh MANQUANT"
fi
if loginctl show-user ubuntu 2>/dev/null | grep -q "Linger=yes"; then
ok "loginctl linger=yes"
else
warn "loginctl linger non vérifié"
fi
# ═══════════════════════════════════════════════════
# RÉSULTAT
# ═══════════════════════════════════════════════════
echo ""
echo "══════════════════════════════════════════════════"
echo -e " Résultat: ${GREEN}$PASS PASS${NC}, ${RED}$FAIL FAIL${NC}, ${YELLOW}$WARN WARN${NC}"
echo "══════════════════════════════════════════════════"
if [[ $FAIL -gt 0 ]]; then
echo -e "${RED}Des tests ont échoué. Migration non recommandée.${NC}"
exit 1
fi
if [[ "$MIGRATE" != "true" ]]; then
echo ""
echo "Pour migrer (désactiver les crons), relancer avec --migrate :"
echo " bash $0 --migrate"
exit 0
fi
# ═══════════════════════════════════════════════════
# MIGRATION — Désactiver les crons
# ═══════════════════════════════════════════════════
echo ""
echo "══════════════════════════════════════════════════"
echo " MIGRATION — Désactivation des crons"
echo "══════════════════════════════════════════════════"
# Backup le crontab actuel
crontab -l > /tmp/crontab-backup-$(date +%Y%m%d-%H%M%S).txt
echo "Crontab backupé dans /tmp/crontab-backup-*.txt"
# Commenter (pas supprimer) les crons orchestrateur
crontab -l | sed \
-e 's|^\(\*/[0-9].*dispatcher\.sh\)|# MIGRATED-TO-DAEMON # \1|' \
-e 's|^\(\*/[0-9].*watchdog\.sh\)|# MIGRATED-TO-DAEMON # \1|' \
-e 's|^\(\*/[0-9].*janitor\.sh\)|# MIGRATED-TO-DAEMON # \1|' \
-e 's|^\(\*/[0-9].*quota-monitor\.sh\)|# MIGRATED-TO-DAEMON # \1|' \
-e 's|^\(\*/[0-9].*checkpoint-daemon\.sh\)|# MIGRATED-TO-DAEMON # \1|' \
| crontab -
echo "Crons commentés (préfixe MIGRATED-TO-DAEMON)"
echo ""
# Vérifier
echo "Crons restants actifs :"
crontab -l | grep -v "^#" | grep -v "^$" || echo "(aucun cron actif restant)"
echo ""
echo "Le @reboot cron setup-tmux.sh est conservé (backup sessions tmux)."
echo ""
# Stopper le service inbox-watcher si actif
if systemctl is-active --quiet agent-inbox-watcher 2>/dev/null; then
sudo systemctl stop agent-inbox-watcher
sudo systemctl disable agent-inbox-watcher
echo "Service agent-inbox-watcher stoppé et désactivé"
else
echo "agent-inbox-watcher déjà inactif"
fi
echo ""
echo -e "${GREEN}═══════════════════════════════════════════════════${NC}"
echo -e "${GREEN} Migration terminée. claude-failover est le seul ${NC}"
echo -e "${GREEN} orchestrateur actif. ${NC}"
echo -e "${GREEN}═══════════════════════════════════════════════════${NC}"
echo ""
echo "Pour revenir en arrière :"
echo " sudo systemctl stop claude-failover"
echo " crontab -l | sed 's/# MIGRATED-TO-DAEMON # //' | crontab -"
echo " sudo systemctl start agent-inbox-watcher"