diff --git a/launcher.sh b/launcher.sh index d641918..c66ac6d 100755 --- a/launcher.sh +++ b/launcher.sh @@ -600,6 +600,10 @@ show_acronis_menu() { echo -e " ${YELLOW}10)${NC} View Logs - Check Acronis logs" echo -e " ${YELLOW}11)${NC} Uninstall Acronis - Remove Acronis agent" echo "" + echo -e "${BOLD}Troubleshooting:${NC}" + echo "" + echo -e " ${RED}12)${NC} 🔧 Troubleshoot Backups - Diagnose backup failures" + echo "" echo -e " ${RED}0)${NC} Back to Backup & Recovery" echo "" echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" @@ -624,6 +628,7 @@ handle_acronis_menu() { 9) run_module "backup" "acronis-update.sh" ;; 10) run_module "backup" "acronis-logs.sh" ;; 11) run_module "backup" "acronis-uninstall.sh" ;; + 12) run_module "backup" "acronis-troubleshoot.sh" ;; 0) return ;; *) echo -e "${RED}Invalid option${NC}"; sleep 1 ;; esac diff --git a/modules/backup/acronis-troubleshoot.sh b/modules/backup/acronis-troubleshoot.sh new file mode 100755 index 0000000..2efc12a --- /dev/null +++ b/modules/backup/acronis-troubleshoot.sh @@ -0,0 +1,471 @@ +#!/bin/bash + +################################################################################ +# Acronis Backup Troubleshooter +################################################################################ +# Purpose: Diagnose and troubleshoot Acronis backup failures +# Features: +# - Multi-log location scanning +# - Common failure pattern detection +# - Service health checks +# - Disk space analysis +# - Network connectivity tests +# - Automated fix suggestions +################################################################################ + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "$SCRIPT_DIR/lib/common-functions.sh" +source "$SCRIPT_DIR/lib/system-detect.sh" + +# Require root +if [ "$EUID" -ne 0 ]; then + print_error "This script must be run as root" + exit 1 +fi + +# Log locations to check +declare -A LOG_LOCATIONS=( + ["MMS"]="/var/lib/Acronis/BackupAndRecovery/MMS/mms.0.log" + ["MMS_OLD"]="/var/lib/Acronis/BackupAndRecovery/MMS/mms.*.log" + ["AGENT"]="/var/log/acronis/agent/*.log" + ["CORE"]="/var/lib/Acronis/BackupAndRecovery/aakore.log" + ["SCHEDULE"]="/var/lib/Acronis/BackupAndRecovery/scheduler.log" + ["SYSTEM"]="/var/log/messages" + ["SYSLOG"]="/var/log/syslog" +) + +print_banner "Acronis Backup Troubleshooter" + +echo "" +echo -e "${BOLD}Diagnostic Mode${NC}" +echo "" +echo "This tool will analyze:" +echo " • Service status and health" +echo " • Log files for errors and failures" +echo " • System resources (disk, memory)" +echo " • Network connectivity to Acronis Cloud" +echo " • Common backup failure patterns" +echo "" +echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" +echo "" + +# Track issues found +declare -a ISSUES_FOUND=() +declare -a WARNINGS_FOUND=() +declare -a RECOMMENDATIONS=() + +# Function to add issue +add_issue() { + ISSUES_FOUND+=("$1") +} + +# Function to add warning +add_warning() { + WARNINGS_FOUND+=("$1") +} + +# Function to add recommendation +add_recommendation() { + RECOMMENDATIONS+=("$1") +} + +# 1. Check service status +echo -e "${BOLD}[1/7] Checking Acronis Services...${NC}" +echo "" + +declare -a SERVICES=("aakore" "acronis_mms" "acronis_schedule" "active-protection") +all_services_running=true + +for service in "${SERVICES[@]}"; do + if systemctl list-unit-files | grep -q "^${service}.service"; then + if systemctl is-active --quiet "$service"; then + echo -e " ${GREEN}✓${NC} $service is running" + else + echo -e " ${RED}✗${NC} $service is NOT running" + add_issue "Service $service is stopped" + add_recommendation "Start service: systemctl start $service" + all_services_running=false + fi + fi +done + +if [ "$all_services_running" = false ]; then + add_recommendation "Start all services: Go to Acronis menu → Check Agent Status → Start All Services" +fi + +echo "" + +# 2. Check disk space +echo -e "${BOLD}[2/7] Checking Disk Space...${NC}" +echo "" + +# Check backup directory +if [ -d "/var/lib/Acronis" ]; then + backup_disk_usage=$(df -h /var/lib/Acronis | tail -1 | awk '{print $5}' | tr -d '%') + backup_disk_avail=$(df -h /var/lib/Acronis | tail -1 | awk '{print $4}') + + echo " Acronis directory: /var/lib/Acronis" + echo " Disk usage: ${backup_disk_usage}%" + echo " Available: ${backup_disk_avail}" + + if [ "$backup_disk_usage" -gt 95 ]; then + add_issue "Disk space critically low (${backup_disk_usage}% used)" + add_recommendation "Free up disk space or change backup destination" + elif [ "$backup_disk_usage" -gt 90 ]; then + add_warning "Disk space running low (${backup_disk_usage}% used)" + add_recommendation "Monitor disk space closely" + else + echo -e " ${GREEN}✓${NC} Disk space OK" + fi +fi + +# Check system disk +root_disk_usage=$(df -h / | tail -1 | awk '{print $5}' | tr -d '%') +if [ "$root_disk_usage" -gt 90 ]; then + add_warning "Root filesystem at ${root_disk_usage}% capacity" +fi + +echo "" + +# 3. Check memory +echo -e "${BOLD}[3/7] Checking Memory...${NC}" +echo "" + +mem_total=$(free -h | grep "^Mem:" | awk '{print $2}') +mem_available=$(free -h | grep "^Mem:" | awk '{print $7}') +mem_used_percent=$(free | grep "^Mem:" | awk '{printf "%.0f", ($3/$2)*100}') + +echo " Total memory: ${mem_total}" +echo " Available: ${mem_available}" +echo " Used: ${mem_used_percent}%" + +if [ "$mem_used_percent" -gt 95 ]; then + add_warning "Memory usage critically high (${mem_used_percent}%)" + add_recommendation "Check for memory leaks or reduce backup concurrency" +elif [ "$mem_used_percent" -gt 90 ]; then + add_warning "Memory usage high (${mem_used_percent}%)" +else + echo -e " ${GREEN}✓${NC} Memory OK" +fi + +echo "" + +# 4. Check network connectivity +echo -e "${BOLD}[4/7] Checking Network Connectivity...${NC}" +echo "" + +# Check if registered +if [ -f "/etc/Acronis/Global.config" ]; then + cloud_url=$(grep -oP 'CloudUrl[>="].*?https://[^"<]+' /etc/Acronis/Global.config 2>/dev/null | grep -oP 'https://[^"<]+' | head -1) + + if [ -n "$cloud_url" ]; then + echo " Testing connection to: $cloud_url" + + # Extract hostname + cloud_host=$(echo "$cloud_url" | sed 's|https://||' | sed 's|/.*||') + + # Test connectivity + if curl -s --connect-timeout 5 -I "$cloud_url" >/dev/null 2>&1; then + echo -e " ${GREEN}✓${NC} Connection successful" + else + add_issue "Cannot connect to Acronis Cloud: $cloud_url" + add_recommendation "Check firewall rules and network connectivity" + add_recommendation "Test manually: curl -I $cloud_url" + fi + + # Test DNS resolution + if host "$cloud_host" >/dev/null 2>&1; then + echo -e " ${GREEN}✓${NC} DNS resolution OK" + else + add_issue "DNS resolution failed for $cloud_host" + add_recommendation "Check DNS configuration: /etc/resolv.conf" + fi + else + add_warning "Agent may not be registered with Acronis Cloud" + add_recommendation "Register agent: Acronis menu → Register with Cloud" + fi +else + add_warning "Acronis configuration file not found" +fi + +echo "" + +# 5. Scan logs for errors +echo -e "${BOLD}[5/7] Scanning Logs for Errors...${NC}" +echo "" + +# Common error patterns +declare -A ERROR_PATTERNS=( + ["INSUFFICIENT_SPACE"]="insufficient.*space|no.*space.*left|disk.*full" + ["PERMISSION_DENIED"]="permission.*denied|access.*denied|cannot.*access" + ["CONNECTION_FAILED"]="connection.*failed|connection.*refused|timeout|network.*error" + ["AUTH_FAILED"]="authentication.*failed|invalid.*credentials|unauthorized" + ["BACKUP_FAILED"]="backup.*failed|backup.*error|task.*failed" + ["VSS_ERROR"]="vss.*error|snapshot.*failed|shadow.*copy.*error" + ["DATABASE_ERROR"]="database.*error|sql.*error|db.*lock" + ["FILE_LOCKED"]="file.*locked|file.*in.*use|sharing.*violation" +) + +# Scan primary log +primary_log="/var/lib/Acronis/BackupAndRecovery/MMS/mms.0.log" + +if [ -f "$primary_log" ]; then + echo " Scanning primary log: mms.0.log" + + for pattern_name in "${!ERROR_PATTERNS[@]}"; do + pattern="${ERROR_PATTERNS[$pattern_name]}" + + if grep -iE "$pattern" "$primary_log" 2>/dev/null | tail -1 | grep -q .; then + error_count=$(grep -icE "$pattern" "$primary_log" 2>/dev/null) + last_error=$(grep -iE "$pattern" "$primary_log" 2>/dev/null | tail -1) + + echo -e " ${RED}⚠${NC} Found $pattern_name errors (count: $error_count)" + echo -e " Last: ${DIM}${last_error:0:80}...${NC}" + + add_issue "$pattern_name detected in logs (count: $error_count)" + + # Add specific recommendations + case "$pattern_name" in + "INSUFFICIENT_SPACE") + add_recommendation "Free up disk space or change backup destination" + ;; + "PERMISSION_DENIED") + add_recommendation "Check file/directory permissions" + add_recommendation "Ensure Acronis agent has necessary access rights" + ;; + "CONNECTION_FAILED") + add_recommendation "Check network connectivity and firewall rules" + add_recommendation "Verify Acronis Cloud URL is accessible" + ;; + "AUTH_FAILED") + add_recommendation "Re-register agent with valid token" + add_recommendation "Check registration status in web console" + ;; + "VSS_ERROR") + add_recommendation "Check VSS service: vssadmin list writers" + add_recommendation "Restart VSS: net stop vss && net start vss" + ;; + "DATABASE_ERROR") + add_recommendation "Check database connections and locks" + add_recommendation "Consider application-aware backup settings" + ;; + "FILE_LOCKED") + add_recommendation "Identify processes locking files: lsof" + add_recommendation "Schedule backups during low-activity periods" + ;; + esac + fi + done + + # Check for recent backup failures + recent_failures=$(grep -i "backup.*failed\|task.*failed" "$primary_log" 2>/dev/null | tail -5) + if [ -n "$recent_failures" ]; then + echo "" + echo -e " ${YELLOW}Recent backup failures:${NC}" + echo "$recent_failures" | while read -r line; do + echo -e " ${DIM}${line:0:100}${NC}" + done + fi + +else + add_warning "Primary log file not found: $primary_log" +fi + +echo "" + +# 6. Check for stuck backups +echo -e "${BOLD}[6/7] Checking for Stuck Processes...${NC}" +echo "" + +# Check for long-running Acronis processes +old_processes=$(ps aux | grep -i acronis | grep -v grep | awk '{if ($10 ~ /[0-9][0-9]:[0-9][0-9]/) print $0}') + +if [ -n "$old_processes" ]; then + echo -e " ${YELLOW}⚠${NC} Long-running Acronis processes detected:" + echo "$old_processes" | while read -r line; do + echo -e " ${DIM}$line${NC}" + done + add_warning "Long-running Acronis processes may indicate stuck backups" + add_recommendation "Review processes and consider restarting services if stuck" +else + echo -e " ${GREEN}✓${NC} No stuck processes detected" +fi + +echo "" + +# 7. Check configuration issues +echo -e "${BOLD}[7/7] Checking Configuration...${NC}" +echo "" + +# Check if backup plans are configured +if command -v acrocmd &>/dev/null; then + plan_count=$(acrocmd list plans 2>/dev/null | grep -c "^Plan ID" || echo "0") + echo " Configured backup plans: $plan_count" + + if [ "$plan_count" -eq 0 ]; then + add_warning "No backup plans configured" + add_recommendation "Configure backup plans in Acronis web console" + fi +else + echo " ${DIM}acrocmd not available - cannot check backup plans${NC}" +fi + +# Check agent version +if [ -f "/usr/lib/Acronis/BackupAndRecovery/aakore" ]; then + agent_version=$(/usr/lib/Acronis/BackupAndRecovery/aakore --version 2>/dev/null | head -1 || echo "Unknown") + echo " Agent version: $agent_version" +else + add_warning "Cannot determine agent version" +fi + +echo "" +echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" +echo "" + +# Summary Report +echo -e "${BOLD}DIAGNOSTIC SUMMARY${NC}" +echo "" + +if [ ${#ISSUES_FOUND[@]} -eq 0 ] && [ ${#WARNINGS_FOUND[@]} -eq 0 ]; then + echo -e "${GREEN}${BOLD}✓ No issues detected${NC}" + echo "" + echo "Acronis appears to be healthy. If you're experiencing backup" + echo "failures, check the web console for detailed backup logs." +else + # Show issues + if [ ${#ISSUES_FOUND[@]} -gt 0 ]; then + echo -e "${RED}${BOLD}Critical Issues (${#ISSUES_FOUND[@]}):${NC}" + for issue in "${ISSUES_FOUND[@]}"; do + echo -e " ${RED}✗${NC} $issue" + done + echo "" + fi + + # Show warnings + if [ ${#WARNINGS_FOUND[@]} -gt 0 ]; then + echo -e "${YELLOW}${BOLD}Warnings (${#WARNINGS_FOUND[@]}):${NC}" + for warning in "${WARNINGS_FOUND[@]}"; do + echo -e " ${YELLOW}⚠${NC} $warning" + done + echo "" + fi + + # Show recommendations + if [ ${#RECOMMENDATIONS[@]} -gt 0 ]; then + echo -e "${CYAN}${BOLD}Recommendations:${NC}" + local rec_num=1 + for rec in "${RECOMMENDATIONS[@]}"; do + echo -e " ${CYAN}${rec_num}.${NC} $rec" + ((rec_num++)) + done + echo "" + fi +fi + +echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" +echo "" + +# Quick actions +echo -e "${BOLD}Quick Actions:${NC}" +echo "" +echo -e " ${CYAN}1)${NC} View Full Logs (all errors)" +echo -e " ${CYAN}2)${NC} Restart All Services" +echo -e " ${CYAN}3)${NC} Generate Detailed Report" +echo -e " ${CYAN}4)${NC} Export Logs for Support" +echo "" +echo -e " ${RED}0)${NC} Return to Menu" +echo "" +echo -n "Select action (or press Enter to return): " +read -r action + +case "$action" in + 1) + # Show all errors + clear + print_banner "All Errors from Logs" + echo "" + if [ -f "$primary_log" ]; then + grep -iE "error|fail|critical|warn" "$primary_log" | tail -50 + fi + echo "" + press_enter + ;; + 2) + # Restart services + echo "" + echo "Restarting all Acronis services..." + systemctl restart aakore + systemctl restart acronis_mms + systemctl restart acronis_schedule + systemctl restart active-protection + echo "" + print_success "Services restarted" + echo "" + echo "Waiting 5 seconds for services to stabilize..." + sleep 5 + echo "" + echo "Running diagnostic again..." + sleep 2 + exec "$0" + ;; + 3) + # Generate detailed report + report_file="/tmp/acronis-diagnostic-$(date +%Y%m%d-%H%M%S).txt" + echo "" + echo "Generating detailed report..." + + { + echo "Acronis Diagnostic Report" + echo "Generated: $(date)" + echo "Hostname: $(hostname)" + echo "" + echo "=== Service Status ===" + for service in "${SERVICES[@]}"; do + systemctl status "$service" 2>&1 | head -20 + echo "" + done + echo "" + echo "=== Recent Log Entries ===" + if [ -f "$primary_log" ]; then + tail -200 "$primary_log" + fi + echo "" + echo "=== System Resources ===" + df -h + echo "" + free -h + echo "" + echo "=== Network ===" + netstat -tuln | grep -E "7770|7800|8443|44445" + echo "" + echo "=== Processes ===" + ps aux | grep -i acronis | grep -v grep + } > "$report_file" + + print_success "Report generated: $report_file" + echo "" + echo "You can send this report to Acronis support or review it locally." + echo "" + press_enter + ;; + 4) + # Export logs + archive_file="/tmp/acronis-logs-$(date +%Y%m%d-%H%M%S).tar.gz" + echo "" + echo "Exporting logs..." + + if [ -d "/var/lib/Acronis/BackupAndRecovery/MMS" ]; then + tar -czf "$archive_file" /var/lib/Acronis/BackupAndRecovery/MMS/*.log 2>/dev/null + print_success "Logs exported: $archive_file" + echo "" + echo "Archive size: $(du -h "$archive_file" | awk '{print $1}')" + else + print_error "Log directory not found" + fi + echo "" + press_enter + ;; + *) + exit 0 + ;; +esac