backup-tank-2: rewrite for one rsync cmd per drive (fixes include/exclude logic)
authorAndrew Lorimer <andrew@lorimer.id.au>
Sun, 31 May 2026 12:20:39 +0000 (22:20 +1000)
committerAndrew Lorimer <andrew@lorimer.id.au>
Sun, 31 May 2026 12:20:39 +0000 (22:20 +1000)
backup-tank-2.sh
index 27805d33cc8da37967f23c22b146ab191b2b8e5f..c851dc88f30f3f0be3803e7b5f0f97cc20bfa537 100755 (executable)
@@ -1,10 +1,9 @@
 #!/bin/bash
 
 MOUNT_POINT_PREFIX="/mnt/backup"
 #!/bin/bash
 
 MOUNT_POINT_PREFIX="/mnt/backup"
-TEST=1
-DEBUG=1
+TEST=0
+DEBUG=0
 
 
-# Check if the CSV file is provided
 if [ "$#" -ne 1 ]; then
     echo "Usage: $0 <path_to_csv_file>"
     exit 1
 if [ "$#" -ne 1 ]; then
     echo "Usage: $0 <path_to_csv_file>"
     exit 1
@@ -12,116 +11,143 @@ fi
 
 CSV_FILE="$1"
 
 
 CSV_FILE="$1"
 
-# Declare associative arrays to hold paths and rsync command for each drive
 declare -A INCLUDE_PATHS
 declare -A EXCLUDE_PATHS
 declare -A INCLUDE_PATHS
 declare -A EXCLUDE_PATHS
-declare -A RSYNC_CMDS
+declare -A DRIVES
 
 if [ $TEST -eq 1 ]; then
     echo "Reading index $CSV_FILE"
 fi
 
 
 if [ $TEST -eq 1 ]; then
     echo "Reading index $CSV_FILE"
 fi
 
-# Read the CSV file line by line
 while IFS=',' read -r FILE_PATH DRIVE _; do
 while IFS=',' read -r FILE_PATH DRIVE _; do
-    # Trim whitespace
     FILE_PATH=$(echo "$FILE_PATH" | xargs)
     DRIVE=$(echo "$DRIVE" | xargs)
 
     FILE_PATH=$(echo "$FILE_PATH" | xargs)
     DRIVE=$(echo "$DRIVE" | xargs)
 
-    # Check if row is commented with a #
     if [[ "$FILE_PATH" == "#"* ]]; then
     if [[ "$FILE_PATH" == "#"* ]]; then
-       continue
+        continue
     fi
 
     fi
 
-    # Check if path begins with a slash
     if [[ "$FILE_PATH" != /* ]]; then
     if [[ "$FILE_PATH" != /* ]]; then
-      echo "File path $FILE_PATH is not relative - exiting"
-      exit 1
+        echo "File path $FILE_PATH is not absolute - exiting"
+        exit 1
     fi
 
     fi
 
-    # Determine the actual drive path
     if [[ "$DRIVE" =~ ^- ]]; then
     if [[ "$DRIVE" =~ ^- ]]; then
-        # Exclusion case
-        DRIVE=${DRIVE:1}  # Remove the leading '-'
+        DRIVE=${DRIVE:1}
         EXCLUDE_PATHS["$FILE_PATH"]=1
     else
         EXCLUDE_PATHS["$FILE_PATH"]=1
     else
-        # Inclusion case
         INCLUDE_PATHS["$FILE_PATH"]="$DRIVE"
         INCLUDE_PATHS["$FILE_PATH"]="$DRIVE"
-        RSYNC_CMDS["$DRIVE"]=""
+        DRIVES["$DRIVE"]=1
     fi
 done < "$CSV_FILE"
 
     fi
 done < "$CSV_FILE"
 
-
-# Now process the paths to copy
-for FILE_PATH in "${!INCLUDE_PATHS[@]}"; do
-    DRIVE="${INCLUDE_PATHS[$FILE_PATH]}"
+for DRIVE in "${!DRIVES[@]}"; do
     MOUNT_POINT="$MOUNT_POINT_PREFIX$DRIVE"
 
     MOUNT_POINT="$MOUNT_POINT_PREFIX$DRIVE"
 
-    # Check if the mount point exists
-    if [[ $TEST -eq 0 && $(mountpoint -q "$MOUNT_POINT") ]]; then
-        echo "Nothing mounted to $MOUNT_POINT - skipping $FILE_PATH"
-        continue
-    fi
-
-    # Check if the path is excluded
-    if [[ -n "${EXCLUDE_PATHS[$FILE_PATH]}" ]]; then
-        echo "Excluding $FILE_PATH"
+    if [[ $TEST -eq 0 ]] && ! mountpoint -q "$MOUNT_POINT"; then
+        echo "Nothing mounted to $MOUNT_POINT - skipping drive $DRIVE"
+        unset "DRIVES[$DRIVE]"
         continue
     fi
 
         continue
     fi
 
-    # Get parent directory
-    PARENT="$(dirname "$FILE_PATH")"
-    MKDIR_CMD="mkdir -p $MOUNT_POINT$PARENT"
-    if [ $DEBUG -eq 1 ]; then
-           echo "$MKDIR_CMD"
-    fi
-    if [ $TEST -eq 0 ]; then 
-           eval "$MKDIR_CMD"
-    fi
-
-    # Check if the path is a directory
-    EXCLUDE_ARGS=()
-    if [ -d "$FILE_PATH" ]; then
-      for EXCLUDE_PATH in "${!EXCLUDE_PATHS[@]}"; do
-        if [[ "$EXCLUDE_PATH" =~ ^"$FILE_PATH"* ]]; then
-         if [ -d "$EXCLUDE_PATH" ] && [[ ! "$EXCLUDE_PATH" =~ /$ ]]; then
-           EXCLUDE_ARGS+=("--exclude=\"$EXCLUDE_PATH/\"")
-         else
-           EXCLUDE_ARGS+=("--exclude=\"$EXCLUDE_PATH\"")
-         fi
-          if [[ $TEST -eq 1 ]]; then
-            echo "Excluding $EXCLUDE_PATH from $FILE_PATH"
-          fi
+    # Collect included paths for this drive
+    DRIVE_INCLUDES=()
+    for FILE_PATH in "${!INCLUDE_PATHS[@]}"; do
+        [[ "${INCLUDE_PATHS[$FILE_PATH]}" == "$DRIVE" ]] && DRIVE_INCLUDES+=("$FILE_PATH")
+    done
+
+    # Split into special includes (inside an excluded dir) and regular includes
+    SPECIAL_INCLUDES=()
+    REGULAR_INCLUDES=()
+    for INC in "${DRIVE_INCLUDES[@]}"; do
+        is_special=0
+        for EXC in "${!EXCLUDE_PATHS[@]}"; do
+            if [[ "$INC" == "$EXC"/* ]]; then
+                is_special=1
+                break
+            fi
+        done
+        if [[ $is_special -eq 1 ]]; then
+            SPECIAL_INCLUDES+=("$INC")
+        else
+            REGULAR_INCLUDES+=("$INC")
         fi
         fi
-      done
-    fi
-    RSYNC_CMD="rsync -Par"
-    if [[ $TEST -eq 1 ]]; then
-      RSYNC_CMD+="vn"
-    fi
-    if [[ $DEBUG -eq 0 ]]; then
-      RSYNC_CMD+="q"
-    fi
-    RSYNC_CMD+=" "${EXCLUDE_ARGS[@]}" \"$FILE_PATH\" \"$MOUNT_POINT$PARENT/\""
-    RSYNC_CMDS["$DRIVE"]+="${RSYNC_CMDS[$DRIVE]:+$'\n'}$RSYNC_CMD"
-done
+    done
+
+    # Collect ancestor directories of all included paths (for traversal rules)
+    declare -A TRAVERSAL_DIRS
+    for INC in "${DRIVE_INCLUDES[@]}"; do
+        dir="$INC"
+        while [[ "$dir" != "/" ]]; do
+            dir="$(dirname "$dir")"
+            [[ "$dir" != "/" ]] && TRAVERSAL_DIRS["$dir"]=1
+        done
+    done
+
+    # Sort traversal dirs shallowest first so rules are ordered correctly
+    SORTED_TRAVERSAL=()
+    while IFS= read -r dir; do
+        SORTED_TRAVERSAL+=("$dir")
+    done < <(printf '%s\n' "${!TRAVERSAL_DIRS[@]}" | awk '{ print length, $0 }' | sort -n | cut -d' ' -f2-)
+    unset TRAVERSAL_DIRS
+
+    # Build rsync args array
+    FLAGS="-Par"
+    [[ $TEST -eq 1 ]] && FLAGS+="vn"
+    [[ $DEBUG -eq 0 ]] && FLAGS+="q"
+
+    RSYNC_ARGS=("$FLAGS" --delete --delete-excluded)
+
+    # Traversal rules: allow rsync to enter ancestor directories
+    for dir in "${SORTED_TRAVERSAL[@]}"; do
+        RSYNC_ARGS+=(--include="/${dir#/}/")
+    done
+
+    # Special includes: paths inside excluded dirs must appear before their exclusion rule
+    for INC in "${SPECIAL_INCLUDES[@]}"; do
+        rel="${INC#/}"
+        RSYNC_ARGS+=(--include="/$rel")
+        [[ -d "$INC" ]] && RSYNC_ARGS+=(--include="/$rel/**")
+    done
+
+    # Exclusion rules — only for paths that fall within this drive's includes
+    for EXC in "${!EXCLUDE_PATHS[@]}"; do
+        for INC in "${DRIVE_INCLUDES[@]}"; do
+            if [[ "$EXC" == "$INC"/* ]]; then
+                RSYNC_ARGS+=(--exclude="/${EXC#/}/**")
+                break
+            fi
+        done
+    done
+
+    # Regular includes
+    for INC in "${REGULAR_INCLUDES[@]}"; do
+        rel="${INC#/}"
+        if [[ -d "$INC" ]]; then
+            RSYNC_ARGS+=(--include="/$rel/")   # the directory itself (needed for traversal)
+            RSYNC_ARGS+=(--include="/$rel/**")
+        else
+            RSYNC_ARGS+=(--include="/$rel")
+        fi
+    done
+
+    # Exclude everything not explicitly included, source, destination
+    RSYNC_ARGS+=(--exclude='*' / "$MOUNT_POINT/")
 
 
-for DRIVE in "${!RSYNC_CMDS[@]}"; do
-  while IFS= read -r cmd; do
-    [[ -z "$cmd" ]] && continue
     if [[ $DEBUG -eq 1 ]]; then
     if [[ $DEBUG -eq 1 ]]; then
-      echo "$cmd"
+        echo "rsync ${RSYNC_ARGS[*]}"
     fi
     fi
+
     if [[ $TEST -eq 0 ]]; then
     if [[ $TEST -eq 0 ]]; then
-      eval "$cmd" &
+        rsync "${RSYNC_ARGS[@]}" &
     fi
     fi
-  done <<< "${RSYNC_CMDS[$DRIVE]}"
 done
 
 wait
 
 echo ""
 
 done
 
 wait
 
 echo ""
 
-for DRIVE in "${!RSYNC_CMDS[@]}"; do
+for DRIVE in "${!DRIVES[@]}"; do
   DRIVE_SERIAL="$(lsblk -n -o SERIAL `mount| grep /mnt/backup2 | awk 'NR==1{print $1}' | sed 's/[0-9]*//g'`)"
   echo "-------------------------------------------"
   echo "Backup summary for Cold Backup Drive $DRIVE"
   DRIVE_SERIAL="$(lsblk -n -o SERIAL `mount| grep /mnt/backup2 | awk 'NR==1{print $1}' | sed 's/[0-9]*//g'`)"
   echo "-------------------------------------------"
   echo "Backup summary for Cold Backup Drive $DRIVE"
@@ -149,3 +175,4 @@ for DRIVE in "${!RSYNC_CMDS[@]}"; do
   echo ""
   echo ""
 done
   echo ""
   echo ""
 done
+