63b2924fe7c78cb025b798bbc83bea46e08038a3
   1#! /bin/bash
   2
   3# get a list of urls and corresponding output file names, then parse them to wget
   4
   5LOGTAG="nameget.sh"
   6LOGFILE=/var/log/nameget
   7BADWORDS="error\|failure\|unable"
   8args=("$@")
   9list=${args[0]}
  10
  11#
  12# Validate arguments
  13#
  14
  15if [ "$EUID" -ne 0 ]
  16  LOGFILE=/dev/null   # redirect log because we're probably not running on a cron job
  17fi
  18
  19if [[ $list == "" ]]; then
  20  printf  "\n\x1b[31mWrong number of arguments\x1b[0m\n\n"
  21  printf "Usage: nameget LIST [DESTINATION] [ARGS]\n       where LIST is a text file of format [url] [name] (one pair per line)\n             [DESTINATION] is a path appended to the start of each [name] in LIST\n             [ARGS] is passed directly to wget"
  22  exit 1
  23fi
  24
  25if [[ -d ${args[1]} ]]; then            # check if args[2] is [DESTINATION] or [ARGS]
  26        outdir=${args[1]}
  27        if [[ ! $outdir =~ /$ ]]; then  # check if [DESTINATION] has a trailing /
  28            outdir=$outdir/
  29        fi
  30        shift 2
  31else
  32        shift 1
  33fi
  34
  35otherargs=$@                            # set aside remaining arguments for passing to wget
  36
  37
  38#
  39# Recurse through list
  40#
  41
  42logger -s -t $LOGTAG "starting downloading files in $(readlink -f $list)" >> $LOGFILE
  43
  44while read -r url filename; do
  45
  46  # parse output filename
  47  destarg=''    # placeholder for -O argument (output filename)
  48  prettyname="$url" # placeholder for failure log
  49  if [ -n "$filename" ]; then
  50    destarg=" -O $outdir$filename"
  51    prettyname="$filename from $url"
  52  else
  53    filename=${url##/*/}      # extract basename
  54    filename=${filename/[ _]/-}  # substitute spaces/underscores with dashes
  55    filename=$(echo $filename | tr '[:upper:]' '[:lower:]')
  56    filename=$(tr -cd "[:print:]\n" < file1)
  57    filename=$echo $filename | tr -d 
  58  fi
  59  
  60  # compile wget command (for debugging purposes)
  61  command="wget -r -nv -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg $otherargs $url"
  62
  63  output=$($command 2>&1)  # get stderr of command
  64  exitcode=$?
  65
  66  if [ $exitcode -eq 0 ] || [ `echo $errmsg | grep -i \"$badwords\"` ]; then
  67    sed -i '/\"$url\"/d' $list
  68    logger -s -t $LOGTAG "downloaded $prettyname" >> $LOGFILE
  69  else
  70    logger -s -t $LOGTAG "downloading of $prettyname failed with code $exitcode: $(echo $output |  sed -n -e 's/.*\(error.*\|failure.*\|unable.*\)$/\1/pI' | paste -sd \; -)" >> $LOGFILE
  71  fi
  72done < $list