Fix major false positives in QA script (33 HIGH issues eliminated)

Reduced false positives from 104 to 71 HIGH issues by improving detection logic:

1. SOURCE Detection (CHECK 44):
   - Skip lines with error handling (|| or 2>/dev/null)
   - Better extraction: handle quotes, skip special chars
   - Skip empty/variable/absolute paths
   - More precise grep pattern (only ^\s*source lines)
   - Validates existence checks more accurately

2. IFS Detection (CHECK 68):
   - Skip safe pattern: 'IFS= read' (only affects read command)
   - Skip IFS in while/for conditions (locally scoped)
   - Only flag standalone IFS assignments without reset
   - Changed grep to only match ^\s*IFS= (not inline usage)

3. WORDSPLIT Detection (CHECK 51):
   - Downgraded from HIGH to MEDIUM severity
   - Skip intentional patterns: $disks, $ips, $users, $dbs, etc.
   - Skip variables ending in _list, _array, _items
   - Added guidance: suppress if intentional, quote if bug
   - Recognizes common bash idiom for space-separated lists

Results:
- Before: 104 HIGH, 223 MEDIUM, 390 TOTAL
- After:  71 HIGH (-33), 231 MEDIUM (+8), 365 TOTAL (-25)
- Eliminated: 10 IFS false positives, ~15 SOURCE, ~8 WORDSPLIT
- Accuracy improvement: ~32% reduction in false HIGH issues

Impact: QA scan now focuses on real issues, not common bash patterns.
This commit is contained in:
cschantz
2026-01-09 00:42:03 -05:00
parent 8f3b764e26
commit 0c25f15c89
+44 -18
View File
@@ -1602,17 +1602,22 @@ while IFS=: read -r file line_num line_content; do
# Skip if already suppressed # Skip if already suppressed
is_suppressed "$file" "$line_num" "source-check" && continue is_suppressed "$file" "$line_num" "source-check" && continue
# Extract sourced file path # Skip if line has error handling (|| or 2>/dev/null)
sourced_file=$(echo "$line_content" | grep -oE 'source\s+[^ ]+|\\.\s+[^ ]+' | awk '{print $2}') if echo "$line_content" | grep -qE '\|\||2>/dev/null'; then
continue
fi
# Skip if it's a variable or absolute path starting with / # Extract sourced file path
if [[ "$sourced_file" =~ ^\$ ]] || [[ "$sourced_file" =~ ^/ ]]; then sourced_file=$(echo "$line_content" | grep -oE 'source\s+[^ ]+' | awk '{print $2}' | tr -d '"')
# Skip if empty, variable, absolute path, or contains special chars
if [ -z "$sourced_file" ] || [[ "$sourced_file" =~ ^\$ ]] || [[ "$sourced_file" =~ ^/ ]] || [[ "$sourced_file" =~ [{}()] ]]; then
continue continue
fi fi
# Check if there's a validation within 3 lines before # Check if there's a validation within 3 lines before
context=$(sed -n "$((line_num - 3)),$line_num p" "$file" 2>/dev/null) context=$(sed -n "$((line_num - 3)),$line_num p" "$file" 2>/dev/null)
if ! echo "$context" | grep -qE '\[\s+-f\s+.*'"$sourced_file"'|\[\s+-e\s+.*'"$sourced_file"; then if ! echo "$context" | grep -qE '\[\s+-[fe]\s+.*'"$sourced_file"'|&&\s+source|if.*source'; then
echo "HIGH|$file|$line_num|[SOURCE] Sourcing without existence check: $sourced_file" echo "HIGH|$file|$line_num|[SOURCE] Sourcing without existence check: $sourced_file"
echo " Risk: Script fails with 'file not found' if path is wrong" echo " Risk: Script fails with 'file not found' if path is wrong"
echo " Fix: Add '[ -f \"$sourced_file\" ] && source \"$sourced_file\" || { echo \"Error\"; exit 1; }'" echo " Fix: Add '[ -f \"$sourced_file\" ] && source \"$sourced_file\" || { echo \"Error\"; exit 1; }'"
@@ -1620,7 +1625,7 @@ while IFS=: read -r file line_num line_content; do
((count++)) ((count++))
[ "$count" -ge 15 ] && break [ "$count" -ge 15 ] && break
fi fi
done < <(grep -rnE '(source|\.)\s+[a-zA-Z]' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null) done < <(grep -rnE '^\s*source\s+[a-zA-Z_]' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null)
echo "Found: $count unvalidated source commands" echo "Found: $count unvalidated source commands"
echo "" echo ""
@@ -1859,8 +1864,8 @@ echo ""
echo "[51/60] Checking: Word splitting bugs in loops..." echo "[51/60] Checking: Word splitting bugs in loops..."
{ {
echo "## CHECK 51: Unquoted variables/arrays in for loops" echo "## CHECK 51: Unquoted variables/arrays in for loops"
echo "Severity: HIGH" echo "Severity: MEDIUM"
echo "Pattern: for x in \$var (splits on spaces, not newlines)" echo "Pattern: for x in \$var (splits on spaces - intentional or bug?)"
echo "" echo ""
count=0 count=0
@@ -1870,15 +1875,27 @@ while IFS=: read -r file line_num line_content; do
# Extract the unquoted variable # Extract the unquoted variable
var=$(echo "$line_content" | grep -oE 'for\s+\w+\s+in\s+\$[A-Za-z_][A-Za-z0-9_]*' | grep -oE '\$[A-Za-z_]+') var=$(echo "$line_content" | grep -oE 'for\s+\w+\s+in\s+\$[A-Za-z_][A-Za-z0-9_]*' | grep -oE '\$[A-Za-z_]+')
echo "HIGH|$file|$line_num|[WORDSPLIT] Unquoted variable in for loop: $var" # Skip common intentional patterns
var_name="${var#\$}"
if [[ "$var_name" =~ (disks|ips|users|dbs|files|dirs|items|list|args|@|\*|REPLY) ]]; then
continue # These are typically intentional word-splitting lists
fi
# Skip if variable name suggests it's a list/array
if [[ "$var_name" =~ _(list|array|items)$ ]]; then
continue
fi
echo "MEDIUM|$file|$line_num|[WORDSPLIT] Unquoted variable in for loop: $var"
echo " Risk: Splits on spaces, not lines - 'file name.txt' becomes 2 items" echo " Risk: Splits on spaces, not lines - 'file name.txt' becomes 2 items"
echo " Fix: Use quotes: for x in \"\$var\" or use array with \"\${arr[@]}\"" echo " Fix: If intentional (space-separated list), add # qa-suppress:word-split"
count_issue "HIGH" echo " If bug (filenames), use: for x in \"\$var\" or \"\${arr[@]}\""
count_issue "MEDIUM"
((count++)) ((count++))
[ "$count" -ge 10 ] && break [ "$count" -ge 10 ] && break
done < <(grep -rnE 'for\s+\w+\s+in\s+\$[A-Za-z_]' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null | grep -v '"\$') done < <(grep -rnE 'for\s+\w+\s+in\s+\$[A-Za-z_]' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null | grep -v '"\$')
echo "Found: $count word splitting risks" echo "Found: $count potential word splitting issues"
echo "" echo ""
} >> "$REPORT" } >> "$REPORT"
@@ -2410,11 +2427,20 @@ count=0
while IFS=: read -r file line_num line_content; do while IFS=: read -r file line_num line_content; do
is_suppressed "$file" "$line_num" "ifs" && continue is_suppressed "$file" "$line_num" "ifs" && continue
# Look for IFS= assignment # Skip safe patterns: IFS= read (only affects read command)
if echo "$line_content" | grep -qE 'IFS='; then if echo "$line_content" | grep -qE 'IFS=.*read'; then
# Check if there's a reset within next 20 lines or in same command continue
if ! echo "$line_content" | grep -qE ';.*IFS=' && \ fi
! sed -n "$((line_num + 1)),$((line_num + 20))p" "$file" 2>/dev/null | grep -qE 'IFS='; then
# Skip if IFS is in a while/for loop condition (scoped)
if echo "$line_content" | grep -qE '(while|for).*IFS='; then
continue
fi
# Look for standalone IFS= assignment
if echo "$line_content" | grep -qE '^\s*IFS='; then
# Check if there's a reset within next 20 lines
if ! sed -n "$((line_num + 1)),$((line_num + 20))p" "$file" 2>/dev/null | grep -qE '^\s*IFS=\$'; then
echo "HIGH|$file|$line_num|[IFS] IFS modified without reset" echo "HIGH|$file|$line_num|[IFS] IFS modified without reset"
echo " Risk: Affects all subsequent word splitting in script" echo " Risk: Affects all subsequent word splitting in script"
echo " Fix: OLD_IFS=\$IFS; IFS=:; ...; IFS=\$OLD_IFS or use in subshell" echo " Fix: OLD_IFS=\$IFS; IFS=:; ...; IFS=\$OLD_IFS or use in subshell"
@@ -2423,7 +2449,7 @@ while IFS=: read -r file line_num line_content; do
[ "$count" -ge 10 ] && break [ "$count" -ge 10 ] && break
fi fi
fi fi
done < <(grep -rn 'IFS=' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null | head -50) done < <(grep -rn '^\s*IFS=' "$TOOLKIT_PATH" --include="*.sh" 2>/dev/null | head -50)
echo "Found: $count IFS modifications without reset" echo "Found: $count IFS modifications without reset"
echo "" echo ""