063544c84280843d23f6d42354adfcf2b81d5d59
   1#! /bin/bash
   2#
   3# Get a list of urls and optional corresponding output paths, then send them to wget
   4# For more info, see `nameget.sh --help`
   5#   Andrew Lorimer   https://lorimer.id.au
   6#
   7
   8VERSION="0.9"
   9LOGTAG="$(basename "$0")" 
  10LOGFILE=/var/log/nameget
  11BADWORDS="error\|failure\|unable\|denied\|directory"
  12outdir="./"
  13
  14function output() {
  15  if [ $test ] || [ "$EUID" -ne 0 ]; then
  16    printf "$2%s\e[0m\n" "$1"
  17  else
  18    logger -s -t $LOGTAG "$1" >> $LOGFILE
  19  fi
  20}
  21
  22usagelong="\e[34m$(basename "$0")\e[0m $VERSION
  23Get a list of urls and optional corresponding output paths, then send them to wget
  24
  25\e[1mUSAGE:\e[0m
  26  $(basename "$0") [OPTIONS]
  27
  28\e[1mOPTIONS:\e[0m
  29  -l, --list
  30    Text file where each line contains a url and filename separated by a space. Filename is optional. Lines starting with # are ignored.
  31    Default: ./queue.txt
  32
  33  -h, --help
  34    Show this help message
  35  
  36  -t, --test
  37    Dry run - parses LIST and prints wget commands but does not execute them
  38    
  39  -d, --destination \e[4m<destination>\e[0m
  40    Directory in which to save each file in LIST
  41    Default: ./
  42
  43  -n, --number
  44    Maximum number of files to download
  45    Default: none
  46    
  47  -a, --args \e[4m<args>\e[0m
  48    String of shell arguments which are passed verbatim to wget\n"
  49
  50usageshort="\e[1mUSAGE:\e[0m
  51  $(basename "$0") [OPTIONS] -l LIST
  52
  53For more information, see \e[34m$(basename "$0") --help\e[0m\n"
  54
  55if [ "$EUID" -ne 0 ]; then
  56  LOGFILE=/dev/null   # redirect log because we're probably not running on a cron job
  57fi
  58
  59#
  60# Validate arguments
  61#
  62
  63while [ $# -gt 0 ]; do
  64  case "$1" in
  65    -l|--list)
  66      list=(${2-})
  67      shift
  68      ;;
  69    -h|--help)
  70      printf "$usagelong" 
  71      exit
  72      ;;
  73    -t|--test)
  74      test=1
  75      shift
  76      ;;
  77    -d|--destination)
  78      outdir=(${2-})
  79      if [ -z "$outdir" ] || [ "$outdir" = " " ]; then
  80        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
  81        printf "$usageshort"
  82        exit 1
  83      fi
  84      shift
  85      ;;
  86    -n|--number)
  87      maxdl=(${2-})
  88      if [ -z "$maxdl" ] || [ "$maxdl" = " " ]; then
  89        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
  90        printf "$usageshort"
  91        exit 1
  92      fi
  93      shift
  94      ;;
  95    -a|--args)
  96      otherargs=" ${2-}"
  97      if [ -z "$otherargs" ] || [ "$otherargs" = " " ]; then
  98        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
  99        printf "$usageshort"
 100        exit 1
 101      fi
 102      shift
 103      ;;
 104    --)
 105      break
 106      ;;
 107    *)
 108      printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
 109      printf "$usageshort"
 110      exit 1
 111      ;;
 112  esac
 113  shift
 114done
 115
 116if [ -z "$list" ] || [ "$list" = " " ]; then
 117  printf "\x1b[31mList location not specified\x1b[0m\n\n" >&2
 118  printf "$usageshort" >&2
 119  exit 1
 120fi
 121
 122output "Welcome to $(basename "$0") $VERSION" "\e[34m"
 123
 124if [ "$test" ]; then
 125  output "Running in test mode"
 126  output "Finished parsing options"
 127fi
 128
 129
 130if [[ ! $outdir =~ /$ ]]; then  # check if [DESTINATION] has a trailing /
 131    outdir=$outdir/
 132 fi
 133
 134#
 135# Recurse through list
 136#
 137
 138
 139output "Starting downloading files in $(readlink -f $list) to $outdir" "\n\e[1m"
 140
 141n=0
 142
 143while read -r url filename; do
 144
 145  # Ignore if line is commented
 146  [[ "$url" =~ ^\s*#.*  ]] && continue
 147
 148  # Stop when maxdl is exceeded
 149  [ ! -z ${maxdl} ] && [[ $n -ge $maxdl ]] && break
 150
 151  # parse output filename
 152  destarg=''    # placeholder for -O argument (output filename)
 153  if [ -z "$filename" ] || [ "filename" = "" ]; then
 154    filename="$(echo $url | sed 's/\/\$//; s/.*\///; s/[_ \.]/-/g; s/\%20/-/g; s/\(.*\)/\L\1/; s/^the-//; s/-the\(-movie\)\?\(-film\)\?//g; s/^a-//; s/-(*\(20\|19\)[0-9]\{2\})*.*-\(.*\)/.\2/g; s/-[0-9]\{3,4\}p.*-\(.*\)/.\1/g; s/-ii-/-2-/g; s/-iii-/-3-/g; s/-iv-/-4-/g; s/-v-/-5-/g; s/-vi-/-6-/g; s/-vii-/-7-/g; s/-viii-/-8-/g; s/-viiii-/-9-/g; s/-x-/-10-/g;')"
 155  fi
 156  destarg=" -O $outdir$filename"
 157  prettyname="$filename from $url"
 158  if [ "$EUID" -eq 0 ]; then
 159    nvarg=" -nv"
 160  fi
 161
 162  # compile wget command (for debugging purposes)
 163  command="wget$nvarg -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg$otherargs $url"
 164
 165  n=$((n+1))
 166  if [ "$test" ]; then
 167    output "$command" "    "
 168  else
 169    if [ "$EUID" -ne 0 ]; then # get stdout of command
 170      wgetout=$($command | tee /dev/tty)  # get stdout of command
 171    else
 172      wgetout=$($command 2>&1 > /dev/null)  # get stderr of command
 173    fi
 174    exitcode=$?
 175    failures=$(echo "$wgetout" | grep -i "$badwords")
 176    if [ $exitcode -eq 0 ] && [ -z "$failures" ]; then
 177      sed -i '/\"$url\"/d' $list
 178      output "Downloaded $prettyname" "\e[32m"
 179    else
 180      output "Downloading of $prettyname failed with code $exitcode: $(echo $failures | paste -sd \; -)" "\e[31m"
 181    fi
 182  fi
 183
 184done < $list