Add comprehensive Acronis backup troubleshooting tool

Created acronis-troubleshoot.sh with intelligent diagnostic capabilities:

7-Point Diagnostic System:
1. Service Health Check
   - Verifies all 4 Acronis services (aakore, mms, schedule, active-protection)
   - Detects stopped/failed services
   - Auto-generates restart recommendations

2. Disk Space Analysis
   - Checks /var/lib/Acronis and root filesystem
   - Warns at 90%, critical at 95% usage
   - Identifies insufficient space for backups

3. Memory Monitoring
   - Tracks system memory usage
   - Warns at high memory conditions (>90%)
   - Detects potential memory leaks

4. Network Connectivity Testing
   - Tests connection to Acronis Cloud URL
   - DNS resolution verification
   - Identifies firewall/network issues

5. Multi-Location Log Scanning
   - Scans multiple log locations:
     * /var/lib/Acronis/BackupAndRecovery/MMS/mms.*.log
     * /var/log/acronis/agent/*.log
     * System logs (/var/log/messages, /var/log/syslog)
   - Pattern detection for 8 common failure types:
     * Insufficient space errors
     * Permission denied
     * Connection failures
     * Authentication failures
     * Backup task failures
     * VSS/snapshot errors
     * Database errors
     * File locking issues

6. Stuck Process Detection
   - Identifies long-running Acronis processes
   - Detects hung backup jobs
   - Recommends service restarts when needed

7. Configuration Verification
   - Checks backup plan configuration
   - Verifies agent version
   - Registration status validation

Intelligent Recommendations:
- Context-aware fix suggestions based on detected issues
- Prioritized action items (critical vs warnings)
- Specific commands to resolve each issue type

Quick Actions Menu:
1. View all errors from logs
2. Restart all services
3. Generate detailed diagnostic report for support
4. Export logs as tar.gz archive

Issue Tracking:
- Categorizes findings as CRITICAL or WARNINGS
- Provides comprehensive summary with counts
- Color-coded output (red=critical, yellow=warning, green=ok)

Added to Acronis menu as option 12 (Troubleshooting section)

This tool enables rapid diagnosis of backup failures without needing
to manually dig through logs or check multiple system components.
This commit is contained in:
cschantz
2025-11-05 21:36:13 -05:00
parent 12c90f3a4e
commit dd07e3a824
2 changed files with 476 additions and 0 deletions
+5
View File
@@ -600,6 +600,10 @@ show_acronis_menu() {
echo -e " ${YELLOW}10)${NC} View Logs - Check Acronis logs" echo -e " ${YELLOW}10)${NC} View Logs - Check Acronis logs"
echo -e " ${YELLOW}11)${NC} Uninstall Acronis - Remove Acronis agent" echo -e " ${YELLOW}11)${NC} Uninstall Acronis - Remove Acronis agent"
echo "" echo ""
echo -e "${BOLD}Troubleshooting:${NC}"
echo ""
echo -e " ${RED}12)${NC} 🔧 Troubleshoot Backups - Diagnose backup failures"
echo ""
echo -e " ${RED}0)${NC} Back to Backup & Recovery" echo -e " ${RED}0)${NC} Back to Backup & Recovery"
echo "" echo ""
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}" echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
@@ -624,6 +628,7 @@ handle_acronis_menu() {
9) run_module "backup" "acronis-update.sh" ;; 9) run_module "backup" "acronis-update.sh" ;;
10) run_module "backup" "acronis-logs.sh" ;; 10) run_module "backup" "acronis-logs.sh" ;;
11) run_module "backup" "acronis-uninstall.sh" ;; 11) run_module "backup" "acronis-uninstall.sh" ;;
12) run_module "backup" "acronis-troubleshoot.sh" ;;
0) return ;; 0) return ;;
*) echo -e "${RED}Invalid option${NC}"; sleep 1 ;; *) echo -e "${RED}Invalid option${NC}"; sleep 1 ;;
esac esac
+471
View File
@@ -0,0 +1,471 @@
#!/bin/bash
################################################################################
# Acronis Backup Troubleshooter
################################################################################
# Purpose: Diagnose and troubleshoot Acronis backup failures
# Features:
# - Multi-log location scanning
# - Common failure pattern detection
# - Service health checks
# - Disk space analysis
# - Network connectivity tests
# - Automated fix suggestions
################################################################################
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$SCRIPT_DIR/lib/common-functions.sh"
source "$SCRIPT_DIR/lib/system-detect.sh"
# Require root
if [ "$EUID" -ne 0 ]; then
print_error "This script must be run as root"
exit 1
fi
# Log locations to check
declare -A LOG_LOCATIONS=(
["MMS"]="/var/lib/Acronis/BackupAndRecovery/MMS/mms.0.log"
["MMS_OLD"]="/var/lib/Acronis/BackupAndRecovery/MMS/mms.*.log"
["AGENT"]="/var/log/acronis/agent/*.log"
["CORE"]="/var/lib/Acronis/BackupAndRecovery/aakore.log"
["SCHEDULE"]="/var/lib/Acronis/BackupAndRecovery/scheduler.log"
["SYSTEM"]="/var/log/messages"
["SYSLOG"]="/var/log/syslog"
)
print_banner "Acronis Backup Troubleshooter"
echo ""
echo -e "${BOLD}Diagnostic Mode${NC}"
echo ""
echo "This tool will analyze:"
echo " • Service status and health"
echo " • Log files for errors and failures"
echo " • System resources (disk, memory)"
echo " • Network connectivity to Acronis Cloud"
echo " • Common backup failure patterns"
echo ""
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
echo ""
# Track issues found
declare -a ISSUES_FOUND=()
declare -a WARNINGS_FOUND=()
declare -a RECOMMENDATIONS=()
# Function to add issue
add_issue() {
ISSUES_FOUND+=("$1")
}
# Function to add warning
add_warning() {
WARNINGS_FOUND+=("$1")
}
# Function to add recommendation
add_recommendation() {
RECOMMENDATIONS+=("$1")
}
# 1. Check service status
echo -e "${BOLD}[1/7] Checking Acronis Services...${NC}"
echo ""
declare -a SERVICES=("aakore" "acronis_mms" "acronis_schedule" "active-protection")
all_services_running=true
for service in "${SERVICES[@]}"; do
if systemctl list-unit-files | grep -q "^${service}.service"; then
if systemctl is-active --quiet "$service"; then
echo -e " ${GREEN}${NC} $service is running"
else
echo -e " ${RED}${NC} $service is NOT running"
add_issue "Service $service is stopped"
add_recommendation "Start service: systemctl start $service"
all_services_running=false
fi
fi
done
if [ "$all_services_running" = false ]; then
add_recommendation "Start all services: Go to Acronis menu → Check Agent Status → Start All Services"
fi
echo ""
# 2. Check disk space
echo -e "${BOLD}[2/7] Checking Disk Space...${NC}"
echo ""
# Check backup directory
if [ -d "/var/lib/Acronis" ]; then
backup_disk_usage=$(df -h /var/lib/Acronis | tail -1 | awk '{print $5}' | tr -d '%')
backup_disk_avail=$(df -h /var/lib/Acronis | tail -1 | awk '{print $4}')
echo " Acronis directory: /var/lib/Acronis"
echo " Disk usage: ${backup_disk_usage}%"
echo " Available: ${backup_disk_avail}"
if [ "$backup_disk_usage" -gt 95 ]; then
add_issue "Disk space critically low (${backup_disk_usage}% used)"
add_recommendation "Free up disk space or change backup destination"
elif [ "$backup_disk_usage" -gt 90 ]; then
add_warning "Disk space running low (${backup_disk_usage}% used)"
add_recommendation "Monitor disk space closely"
else
echo -e " ${GREEN}${NC} Disk space OK"
fi
fi
# Check system disk
root_disk_usage=$(df -h / | tail -1 | awk '{print $5}' | tr -d '%')
if [ "$root_disk_usage" -gt 90 ]; then
add_warning "Root filesystem at ${root_disk_usage}% capacity"
fi
echo ""
# 3. Check memory
echo -e "${BOLD}[3/7] Checking Memory...${NC}"
echo ""
mem_total=$(free -h | grep "^Mem:" | awk '{print $2}')
mem_available=$(free -h | grep "^Mem:" | awk '{print $7}')
mem_used_percent=$(free | grep "^Mem:" | awk '{printf "%.0f", ($3/$2)*100}')
echo " Total memory: ${mem_total}"
echo " Available: ${mem_available}"
echo " Used: ${mem_used_percent}%"
if [ "$mem_used_percent" -gt 95 ]; then
add_warning "Memory usage critically high (${mem_used_percent}%)"
add_recommendation "Check for memory leaks or reduce backup concurrency"
elif [ "$mem_used_percent" -gt 90 ]; then
add_warning "Memory usage high (${mem_used_percent}%)"
else
echo -e " ${GREEN}${NC} Memory OK"
fi
echo ""
# 4. Check network connectivity
echo -e "${BOLD}[4/7] Checking Network Connectivity...${NC}"
echo ""
# Check if registered
if [ -f "/etc/Acronis/Global.config" ]; then
cloud_url=$(grep -oP 'CloudUrl[>="].*?https://[^"<]+' /etc/Acronis/Global.config 2>/dev/null | grep -oP 'https://[^"<]+' | head -1)
if [ -n "$cloud_url" ]; then
echo " Testing connection to: $cloud_url"
# Extract hostname
cloud_host=$(echo "$cloud_url" | sed 's|https://||' | sed 's|/.*||')
# Test connectivity
if curl -s --connect-timeout 5 -I "$cloud_url" >/dev/null 2>&1; then
echo -e " ${GREEN}${NC} Connection successful"
else
add_issue "Cannot connect to Acronis Cloud: $cloud_url"
add_recommendation "Check firewall rules and network connectivity"
add_recommendation "Test manually: curl -I $cloud_url"
fi
# Test DNS resolution
if host "$cloud_host" >/dev/null 2>&1; then
echo -e " ${GREEN}${NC} DNS resolution OK"
else
add_issue "DNS resolution failed for $cloud_host"
add_recommendation "Check DNS configuration: /etc/resolv.conf"
fi
else
add_warning "Agent may not be registered with Acronis Cloud"
add_recommendation "Register agent: Acronis menu → Register with Cloud"
fi
else
add_warning "Acronis configuration file not found"
fi
echo ""
# 5. Scan logs for errors
echo -e "${BOLD}[5/7] Scanning Logs for Errors...${NC}"
echo ""
# Common error patterns
declare -A ERROR_PATTERNS=(
["INSUFFICIENT_SPACE"]="insufficient.*space|no.*space.*left|disk.*full"
["PERMISSION_DENIED"]="permission.*denied|access.*denied|cannot.*access"
["CONNECTION_FAILED"]="connection.*failed|connection.*refused|timeout|network.*error"
["AUTH_FAILED"]="authentication.*failed|invalid.*credentials|unauthorized"
["BACKUP_FAILED"]="backup.*failed|backup.*error|task.*failed"
["VSS_ERROR"]="vss.*error|snapshot.*failed|shadow.*copy.*error"
["DATABASE_ERROR"]="database.*error|sql.*error|db.*lock"
["FILE_LOCKED"]="file.*locked|file.*in.*use|sharing.*violation"
)
# Scan primary log
primary_log="/var/lib/Acronis/BackupAndRecovery/MMS/mms.0.log"
if [ -f "$primary_log" ]; then
echo " Scanning primary log: mms.0.log"
for pattern_name in "${!ERROR_PATTERNS[@]}"; do
pattern="${ERROR_PATTERNS[$pattern_name]}"
if grep -iE "$pattern" "$primary_log" 2>/dev/null | tail -1 | grep -q .; then
error_count=$(grep -icE "$pattern" "$primary_log" 2>/dev/null)
last_error=$(grep -iE "$pattern" "$primary_log" 2>/dev/null | tail -1)
echo -e " ${RED}${NC} Found $pattern_name errors (count: $error_count)"
echo -e " Last: ${DIM}${last_error:0:80}...${NC}"
add_issue "$pattern_name detected in logs (count: $error_count)"
# Add specific recommendations
case "$pattern_name" in
"INSUFFICIENT_SPACE")
add_recommendation "Free up disk space or change backup destination"
;;
"PERMISSION_DENIED")
add_recommendation "Check file/directory permissions"
add_recommendation "Ensure Acronis agent has necessary access rights"
;;
"CONNECTION_FAILED")
add_recommendation "Check network connectivity and firewall rules"
add_recommendation "Verify Acronis Cloud URL is accessible"
;;
"AUTH_FAILED")
add_recommendation "Re-register agent with valid token"
add_recommendation "Check registration status in web console"
;;
"VSS_ERROR")
add_recommendation "Check VSS service: vssadmin list writers"
add_recommendation "Restart VSS: net stop vss && net start vss"
;;
"DATABASE_ERROR")
add_recommendation "Check database connections and locks"
add_recommendation "Consider application-aware backup settings"
;;
"FILE_LOCKED")
add_recommendation "Identify processes locking files: lsof"
add_recommendation "Schedule backups during low-activity periods"
;;
esac
fi
done
# Check for recent backup failures
recent_failures=$(grep -i "backup.*failed\|task.*failed" "$primary_log" 2>/dev/null | tail -5)
if [ -n "$recent_failures" ]; then
echo ""
echo -e " ${YELLOW}Recent backup failures:${NC}"
echo "$recent_failures" | while read -r line; do
echo -e " ${DIM}${line:0:100}${NC}"
done
fi
else
add_warning "Primary log file not found: $primary_log"
fi
echo ""
# 6. Check for stuck backups
echo -e "${BOLD}[6/7] Checking for Stuck Processes...${NC}"
echo ""
# Check for long-running Acronis processes
old_processes=$(ps aux | grep -i acronis | grep -v grep | awk '{if ($10 ~ /[0-9][0-9]:[0-9][0-9]/) print $0}')
if [ -n "$old_processes" ]; then
echo -e " ${YELLOW}${NC} Long-running Acronis processes detected:"
echo "$old_processes" | while read -r line; do
echo -e " ${DIM}$line${NC}"
done
add_warning "Long-running Acronis processes may indicate stuck backups"
add_recommendation "Review processes and consider restarting services if stuck"
else
echo -e " ${GREEN}${NC} No stuck processes detected"
fi
echo ""
# 7. Check configuration issues
echo -e "${BOLD}[7/7] Checking Configuration...${NC}"
echo ""
# Check if backup plans are configured
if command -v acrocmd &>/dev/null; then
plan_count=$(acrocmd list plans 2>/dev/null | grep -c "^Plan ID" || echo "0")
echo " Configured backup plans: $plan_count"
if [ "$plan_count" -eq 0 ]; then
add_warning "No backup plans configured"
add_recommendation "Configure backup plans in Acronis web console"
fi
else
echo " ${DIM}acrocmd not available - cannot check backup plans${NC}"
fi
# Check agent version
if [ -f "/usr/lib/Acronis/BackupAndRecovery/aakore" ]; then
agent_version=$(/usr/lib/Acronis/BackupAndRecovery/aakore --version 2>/dev/null | head -1 || echo "Unknown")
echo " Agent version: $agent_version"
else
add_warning "Cannot determine agent version"
fi
echo ""
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
echo ""
# Summary Report
echo -e "${BOLD}DIAGNOSTIC SUMMARY${NC}"
echo ""
if [ ${#ISSUES_FOUND[@]} -eq 0 ] && [ ${#WARNINGS_FOUND[@]} -eq 0 ]; then
echo -e "${GREEN}${BOLD}✓ No issues detected${NC}"
echo ""
echo "Acronis appears to be healthy. If you're experiencing backup"
echo "failures, check the web console for detailed backup logs."
else
# Show issues
if [ ${#ISSUES_FOUND[@]} -gt 0 ]; then
echo -e "${RED}${BOLD}Critical Issues (${#ISSUES_FOUND[@]}):${NC}"
for issue in "${ISSUES_FOUND[@]}"; do
echo -e " ${RED}${NC} $issue"
done
echo ""
fi
# Show warnings
if [ ${#WARNINGS_FOUND[@]} -gt 0 ]; then
echo -e "${YELLOW}${BOLD}Warnings (${#WARNINGS_FOUND[@]}):${NC}"
for warning in "${WARNINGS_FOUND[@]}"; do
echo -e " ${YELLOW}${NC} $warning"
done
echo ""
fi
# Show recommendations
if [ ${#RECOMMENDATIONS[@]} -gt 0 ]; then
echo -e "${CYAN}${BOLD}Recommendations:${NC}"
local rec_num=1
for rec in "${RECOMMENDATIONS[@]}"; do
echo -e " ${CYAN}${rec_num}.${NC} $rec"
((rec_num++))
done
echo ""
fi
fi
echo -e "${CYAN}──────────────────────────────────────────────────────────────${NC}"
echo ""
# Quick actions
echo -e "${BOLD}Quick Actions:${NC}"
echo ""
echo -e " ${CYAN}1)${NC} View Full Logs (all errors)"
echo -e " ${CYAN}2)${NC} Restart All Services"
echo -e " ${CYAN}3)${NC} Generate Detailed Report"
echo -e " ${CYAN}4)${NC} Export Logs for Support"
echo ""
echo -e " ${RED}0)${NC} Return to Menu"
echo ""
echo -n "Select action (or press Enter to return): "
read -r action
case "$action" in
1)
# Show all errors
clear
print_banner "All Errors from Logs"
echo ""
if [ -f "$primary_log" ]; then
grep -iE "error|fail|critical|warn" "$primary_log" | tail -50
fi
echo ""
press_enter
;;
2)
# Restart services
echo ""
echo "Restarting all Acronis services..."
systemctl restart aakore
systemctl restart acronis_mms
systemctl restart acronis_schedule
systemctl restart active-protection
echo ""
print_success "Services restarted"
echo ""
echo "Waiting 5 seconds for services to stabilize..."
sleep 5
echo ""
echo "Running diagnostic again..."
sleep 2
exec "$0"
;;
3)
# Generate detailed report
report_file="/tmp/acronis-diagnostic-$(date +%Y%m%d-%H%M%S).txt"
echo ""
echo "Generating detailed report..."
{
echo "Acronis Diagnostic Report"
echo "Generated: $(date)"
echo "Hostname: $(hostname)"
echo ""
echo "=== Service Status ==="
for service in "${SERVICES[@]}"; do
systemctl status "$service" 2>&1 | head -20
echo ""
done
echo ""
echo "=== Recent Log Entries ==="
if [ -f "$primary_log" ]; then
tail -200 "$primary_log"
fi
echo ""
echo "=== System Resources ==="
df -h
echo ""
free -h
echo ""
echo "=== Network ==="
netstat -tuln | grep -E "7770|7800|8443|44445"
echo ""
echo "=== Processes ==="
ps aux | grep -i acronis | grep -v grep
} > "$report_file"
print_success "Report generated: $report_file"
echo ""
echo "You can send this report to Acronis support or review it locally."
echo ""
press_enter
;;
4)
# Export logs
archive_file="/tmp/acronis-logs-$(date +%Y%m%d-%H%M%S).tar.gz"
echo ""
echo "Exporting logs..."
if [ -d "/var/lib/Acronis/BackupAndRecovery/MMS" ]; then
tar -czf "$archive_file" /var/lib/Acronis/BackupAndRecovery/MMS/*.log 2>/dev/null
print_success "Logs exported: $archive_file"
echo ""
echo "Archive size: $(du -h "$archive_file" | awk '{print $1}')"
else
print_error "Log directory not found"
fi
echo ""
press_enter
;;
*)
exit 0
;;
esac