MySQL Restore Script Phase 1: Critical Diagnostics & Validation

Implement three critical validation checkpoints to improve recovery reliability
and provide users with clear diagnostic information before recovery attempts.

Issue #1: Pre-flight file validation
- New validate_backup_files() function validates all critical files
  before starting MySQL instance (ibdata1, redo logs, mysql/, target DB)
- Checks readability and permissions
- Prevents wasted time starting instance when files are missing
- Provides clear remediation steps if issues found

Issue #2: Enhanced database discovery
- New discover_and_report_databases() function lists all found databases
  and explains why target database might be missing
- Automatic system table accessibility testing
- Root cause diagnosis (which system tables are corrupted)
- Actionable remediation suggestions based on failure type

Issue #3: System table validation
- New test_system_tables() function validates critical system tables
  after instance starts, before dump attempt
- Tests mysql.db, mysql.innodb_table_stats, information_schema.schemata
- Early detection of system table corruption
- User choice to continue or cancel based on test results

Integration into recovery workflow:
- validate_backup_files() called before instance startup (~line 2080)
- test_system_tables() called after startup, before dump (~line 2184)
- discover_and_report_databases() called in dump_database() (~line 1571)

Benefits:
- Immediate feedback if recovery will fail (before instance startup)
- Clear diagnostic output explaining exactly what's wrong
- No more mystery failures with vague error messages
- Actionable remediation steps for each failure mode

Testing:
- ✓ Syntax validation passed
- ✓ All integration points verified
- ✓ MySQL version compatibility (5.7, 8.0, 8.0.30+)
- ✓ Edge cases handled (permissions, missing tables, corruption)
- ✓ Backward compatible with existing workflow

Related: Ticket #43751550, MYSQL_RESTORE_SCRIPT_IMPROVEMENTS.md

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
cschantz
2026-02-27 17:49:52 -05:00
parent 9bb904da61
commit bd43a6b566
2 changed files with 739 additions and 4 deletions
+320 -4
View File
@@ -312,6 +312,298 @@ validate_restore_structure() {
return 0
}
################################################################################
# PHASE 1 CRITICAL IMPROVEMENTS: Pre-Flight & System Validation
################################################################################
# Issue #1: Validate backup files BEFORE starting MySQL instance
# Checks for critical files, readability, and permissions
validate_backup_files() {
local datadir="$1"
local issues=0
local warnings=0
print_info "Performing pre-flight file validation..."
echo ""
# Check ibdata1 (InnoDB system tablespace)
if [ ! -f "$datadir/ibdata1" ]; then
print_error " ✗ ibdata1 NOT FOUND"
echo " This is the InnoDB system tablespace - REQUIRED"
issues=$((issues + 1))
elif [ ! -r "$datadir/ibdata1" ]; then
print_error " ✗ ibdata1 EXISTS but NOT READABLE"
echo " Permission issue: $(ls -ld "$datadir/ibdata1" | awk '{print $1,$3,$4}')"
issues=$((issues + 1))
else
local size=$(du -h "$datadir/ibdata1" | awk '{print $1}')
print_success " ✓ ibdata1 found ($size)"
fi
# Check redo logs (version-specific)
local redo_status=""
if [ -d "$datadir/#innodb_redo" ]; then
# MySQL 8.0.30+
if [ -r "$datadir/#innodb_redo" ]; then
print_success " ✓ #innodb_redo directory found (MySQL 8.0.30+)"
redo_status="found"
else
print_error " ✗ #innodb_redo directory NOT READABLE"
issues=$((issues + 1))
redo_status="unreadable"
fi
elif [ -f "$datadir/ib_logfile0" ]; then
# MySQL 5.7/MariaDB/MySQL 8.0.0-8.0.29
if [ ! -r "$datadir/ib_logfile0" ]; then
print_error " ✗ ib_logfile0 EXISTS but NOT READABLE"
issues=$((issues + 1))
redo_status="unreadable"
else
local size=$(du -h "$datadir/ib_logfile0" | awk '{print $1}')
print_success " ✓ ib_logfile0 found ($size)"
redo_status="found"
# Check ib_logfile1
if [ -f "$datadir/ib_logfile1" ]; then
if [ ! -r "$datadir/ib_logfile1" ]; then
print_warning " ⚠ ib_logfile1 EXISTS but NOT READABLE"
warnings=$((warnings + 1))
else
print_success " ✓ ib_logfile1 found"
fi
else
print_warning " ⚠ ib_logfile1 not found (may be optional)"
warnings=$((warnings + 1))
fi
fi
else
print_error " ✗ Redo logs NOT FOUND (ib_logfile0 or #innodb_redo)"
echo " Needed for InnoDB recovery"
issues=$((issues + 1))
redo_status="missing"
fi
# Check mysql system database
if [ -d "$datadir/mysql" ]; then
if [ ! -r "$datadir/mysql" ]; then
print_error " ✗ mysql/ directory NOT READABLE"
echo " This contains critical system tables"
issues=$((issues + 1))
else
# Check for key system table files
local mysql_tables=$(find "$datadir/mysql" -maxdepth 1 -type f -readable 2>/dev/null | wc -l)
print_success " ✓ mysql/ directory found ($mysql_tables files)"
fi
elif [ -f "$datadir/mysql.ibd" ]; then
if [ ! -r "$datadir/mysql.ibd" ]; then
print_error " ✗ mysql.ibd EXISTS but NOT READABLE"
issues=$((issues + 1))
else
print_success " ✓ mysql.ibd found"
fi
else
print_error " ✗ System database NOT FOUND (mysql/ or mysql.ibd)"
echo " This contains system metadata required for recovery"
issues=$((issues + 1))
fi
# Check target database directory
if [ -n "$DATABASE_NAME" ]; then
if [ ! -d "$datadir/$DATABASE_NAME" ]; then
print_error " ✗ Database '$DATABASE_NAME' directory NOT FOUND"
echo " Expected at: $datadir/$DATABASE_NAME"
issues=$((issues + 1))
elif [ ! -r "$datadir/$DATABASE_NAME" ]; then
print_error " ✗ Database '$DATABASE_NAME' directory NOT READABLE"
issues=$((issues + 1))
else
local table_files=$(find "$datadir/$DATABASE_NAME" -maxdepth 1 -type f 2>/dev/null | wc -l)
print_success " ✓ Database '$DATABASE_NAME' found ($table_files files)"
fi
fi
# Check directory permissions
if [ ! -x "$datadir" ]; then
print_error " ✗ Directory $datadir NOT EXECUTABLE (traversable)"
echo " Cannot access files inside this directory"
issues=$((issues + 1))
fi
# Check ownership
local dir_owner=$(stat -c '%U:%G' "$datadir" 2>/dev/null || stat -f '%OLp:%OLg' "$datadir" 2>/dev/null || echo "unknown")
if [ "$dir_owner" != "mysql:mysql" ] && [ "$dir_owner" != "root:root" ]; then
print_warning " ⚠ Directory owned by $dir_owner (expected mysql:mysql or root:root)"
warnings=$((warnings + 1))
fi
echo ""
# Report results
if [ "$issues" -gt 0 ]; then
print_error "PRE-FLIGHT VALIDATION FAILED: $issues critical issue(s)"
echo ""
echo "Cannot proceed with recovery due to missing or unreadable files."
echo ""
echo "To fix:"
echo " 1. Verify backup files were extracted correctly"
echo " 2. Check file permissions: chown -R mysql:mysql $datadir"
echo " 3. Ensure sufficient disk space"
echo ""
return 1
fi
if [ "$warnings" -gt 0 ]; then
print_warning "Pre-flight validation passed with $warnings warning(s)"
echo ""
else
print_success "Pre-flight validation PASSED - all critical files present"
echo ""
fi
return 0
}
# Issue #2: Enhanced database discovery with diagnostics
# Lists all databases found and explains why target database might be missing
discover_and_report_databases() {
local datadir="$1"
local target_db="$2"
print_info "Discovering databases in second instance..."
echo ""
# Get list of all databases
local db_list=$(mysql -h localhost -S "$datadir/socket.mysql" -NBe "SHOW DATABASES;" 2>/dev/null)
if [ -z "$db_list" ]; then
print_error "Could not query database list"
return 1
fi
# Display found databases
print_info "Found the following databases:"
echo "$db_list" | while read -r db; do
if [ "$db" = "$target_db" ]; then
echo "$db (TARGET - FOUND)"
else
echo "$db"
fi
done
echo ""
# Check if target was found
if ! echo "$db_list" | grep -q "^$target_db$"; then
print_error "Target database '$target_db' NOT FOUND in instance"
echo ""
echo "Diagnosing why..."
echo ""
# Check if system tables are accessible
print_info "Testing system table accessibility..."
# Test mysql.db table
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM mysql.db LIMIT 1;" 2>/dev/null >/dev/null; then
print_success " ✓ mysql.db table is accessible"
else
print_error " ✗ mysql.db table is NOT ACCESSIBLE or CORRUPTED"
echo ""
echo "This explains why '$target_db' is not visible:"
echo " The mysql.db table stores database metadata"
echo " If corrupted, databases cannot be discovered"
echo ""
return 1
fi
# Test mysql.innodb_table_stats
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM mysql.innodb_table_stats LIMIT 1;" 2>/dev/null >/dev/null; then
print_success " ✓ mysql.innodb_table_stats table is accessible"
else
print_warning " ⚠ mysql.innodb_table_stats table is NOT accessible"
echo " (This may affect performance but not visibility)"
fi
# Test information_schema
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM information_schema.schemata;" 2>/dev/null >/dev/null; then
print_success " ✓ information_schema.schemata is accessible"
else
print_error " ✗ information_schema.schemata is NOT accessible"
echo ""
echo "System tables are severely corrupted - database cannot be recovered"
return 1
fi
echo ""
echo "Recovery Recommendations:"
echo " 1. Check if system tables need recovery:"
echo " - InnoDB system table corruption requires higher recovery modes"
echo " - Try recovery mode 4 or higher (skip checksums/log)"
echo ""
echo " 2. Or restore mysql/ directory from backup separately:"
echo " - Restore mysql/ directory alone"
echo " - Then re-run this script"
echo ""
return 1
fi
print_success "Target database '$target_db' found and accessible"
return 0
}
# Issue #3: Test system table accessibility AFTER instance starts
# Validates that critical system tables are readable
test_system_tables() {
local datadir="$1"
print_info "Testing system table accessibility..."
echo ""
local tests_passed=0
local tests_failed=0
# Test 1: mysql.db table (metadata for database permissions)
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM mysql.db LIMIT 1;" 2>/dev/null >/dev/null; then
print_success " ✓ mysql.db table accessible"
tests_passed=$((tests_passed + 1))
else
print_error " ✗ mysql.db table FAILED"
echo " This table stores database information"
tests_failed=$((tests_failed + 1))
fi
# Test 2: mysql.innodb_table_stats (InnoDB statistics)
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM mysql.innodb_table_stats LIMIT 1;" 2>/dev/null >/dev/null; then
print_success " ✓ mysql.innodb_table_stats table accessible"
tests_passed=$((tests_passed + 1))
else
print_warning " ⚠ mysql.innodb_table_stats table FAILED (may affect performance)"
tests_failed=$((tests_failed + 1))
fi
# Test 3: information_schema.schemata (database list)
if mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM information_schema.schemata;" 2>/dev/null >/dev/null; then
print_success " ✓ information_schema.schemata accessible"
tests_passed=$((tests_passed + 1))
else
print_error " ✗ information_schema.schemata FAILED"
echo " This affects database discovery"
tests_failed=$((tests_failed + 1))
fi
echo ""
if [ "$tests_failed" -gt 0 ]; then
print_error "System table tests: $tests_passed passed, $tests_failed FAILED"
print_error "System tables may be corrupted - recovery may fail"
echo ""
return 1
fi
print_success "All system table tests passed"
return 0
}
################################################################################
# Detect error type from InnoDB log and recommend recovery level
detect_recovery_level_from_errors() {
local error_log="$1"
@@ -1274,12 +1566,13 @@ dump_database() {
print_info "Creating SQL dump of database: $dbname"
print_warning "This may take some time for large databases..."
# Check if database exists in second instance
local db_check=$(mysql -h localhost -S "$datadir/socket.mysql" -NBe "SHOW DATABASES LIKE \`$dbname\`;" 2>/dev/null)
if [ -z "$db_check" ]; then
print_error "Database '$dbname' not found in second instance"
# PHASE 1: Enhanced database discovery (Issue #2)
# Lists found databases and diagnoses why target might be missing
if ! discover_and_report_databases "$datadir" "$dbname"; then
print_error "Database discovery failed - cannot proceed with dump"
return 1
fi
echo ""
# Get table count before dump
local table_count=$(mysql -h localhost -S "$datadir/socket.mysql" -NBe "SELECT COUNT(*) FROM information_schema.TABLES WHERE TABLE_SCHEMA=\`$dbname\`;" 2>/dev/null || echo "0")
@@ -1857,6 +2150,14 @@ step5_create_dump() {
echo "════════════════════════════════════════════════════════════════"
echo ""
# PHASE 1: Pre-flight validation (Issue #1)
if ! validate_backup_files "$TEMP_DATADIR"; then
print_error "Pre-flight validation failed"
press_enter
return 1
fi
echo ""
# Check disk space before proceeding
print_info "Checking available disk space..."
if ! check_disk_space "$(pwd)" 500; then
@@ -1879,6 +2180,21 @@ step5_create_dump() {
echo ""
# PHASE 1: System table validation (Issue #3)
if ! test_system_tables "$TEMP_DATADIR"; then
print_warning "System table checks detected issues"
echo ""
echo -n "Continue anyway? (y/n): "
read -r continue_choice
if [ "$continue_choice" != "y" ]; then
stop_second_instance "$TEMP_DATADIR"
press_enter
return 1
fi
echo ""
fi
echo ""
# Generate output filename - save to parent directory of TEMP_DATADIR
# e.g., if TEMP_DATADIR is /home/temp/restore20251210/mysql
# then output goes to /home/temp/restore20251210/