#! /bin/bash
#
# Get a list of urls and optional corresponding output paths, then send them to wget
# For more info, see `nameget.sh --help`
#   Andrew Lorimer   https://lorimer.id.au
#

VERSION="1.0"
LOGTAG="$(basename "$0")" 
LOGFILE=/var/log/nameget
BADWORDS="error\|failure\|unable\|denied\|directory"
outdir="./"

function output() {
  if [ $test ] || [ "$EUID" -ne 0 ]; then
    printf "$2%s\e[0m\n" "$1"
  else
    logger -s -t $LOGTAG "$1" >> $LOGFILE
  fi
}

usagelong="\e[34m$(basename "$0")\e[0m $VERSION
Get a list of urls and optional corresponding output paths, then send them to wget

\e[1mUSAGE:\e[0m
  $(basename "$0") [OPTIONS]

\e[1mOPTIONS:\e[0m
  -l, --list
    Text file where each line contains a url and filename separated by a space. Filename is optional. Lines starting with # are ignored.
    Default: ./queue.txt

  -h, --help
    Show this help message
  
  -t, --test
    Dry run - parses LIST and prints wget commands but does not execute them
    
  -d, --destination \e[4m<destination>\e[0m
    Directory in which to save each file in LIST
    Default: ./

  -n, --number
    Maximum number of files to download
    Default: no limit
    
  -a, --args \e[4m<args>\e[0m
    String of shell arguments which are passed verbatim to wget\n"

usageshort="\e[1mUSAGE:\e[0m
  $(basename "$0") [OPTIONS] -l LIST

For more information, see \e[34m$(basename "$0") --help\e[0m\n"

if [ "$EUID" -ne 0 ]; then
  LOGFILE=/dev/null   # redirect log because we're probably not running on a cron job
fi

#
# Validate arguments
#

list='queue.txt'

while [ $# -gt 0 ]; do
  case "$1" in
    -l|--list)
      list=(${2-})
      shift
      ;;
    -h|--help)
      printf "$usagelong" 
      exit
      ;;
    -t|--test)
      test=1
      shift
      ;;
    -d|--destination)
      outdir=(${2-})
      if [ -z "$outdir" ] || [ "$outdir" = " " ]; then
        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
        printf "$usageshort"
        exit 1
      fi
      shift
      ;;
    -n|--number)
      maxdl=(${2-})
      if [ -z "$maxdl" ] || [ "$maxdl" = " " ]; then
        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
        printf "$usageshort"
        exit 1
      fi
      shift
      ;;
    -a|--args)
      otherargs=" ${2-}"
      if [ -z "$otherargs" ] || [ "$otherargs" = " " ]; then
        printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
        printf "$usageshort"
        exit 1
      fi
      shift
      ;;
    --)
      break
      ;;
    *)
      printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
      printf "$usageshort"
      exit 1
      ;;
  esac
  shift
done

if [ -z "$list" ] || [ "$list" = " " ]; then
  printf "\x1b[31mList location not specified\x1b[0m\n\n" >&2
  printf "$usageshort" >&2
  exit 1
fi

output "Welcome to $(basename "$0") $VERSION" "\e[34m"

if [ "$test" ]; then
  output "Running in test mode"
  output "Finished parsing options"
fi


if [[ ! $outdir =~ /$ ]]; then	# check if [DESTINATION] has a trailing /
    outdir=$outdir/
 fi

#
# Iterate through list
#


output "Starting downloading files in $(readlink -f $list) to $outdir" "\n\e[1m"

n=0

while read -r url filename; do

  # Ignore if line is commented
  [[ "$url" =~ ^\s*#.*  ]] && continue

  # Stop when maxdl is exceeded
  [ ! -z ${maxdl} ] && [[ $n -ge $maxdl ]] && break

  # parse output filename
  destarg=''    # placeholder for -O argument (output filename)
  if [ -z "$filename" ] || [ "filename" = "" ]; then
    filename="$(echo $url | sed 's/\/\$//; s/.*\///; s/[_ \.]/-/g; s/\%20/-/g; s/\(.*\)/\L\1/; s/^the-//; s/-the\(-movie\)\?\(-film\)\?//g; s/^a-//; s/-(*\(20\|19\)[0-9]\{2\})*.*-\(.*\)/.\2/g; s/-[0-9]\{3,4\}p.*-\(.*\)/.\1/g; s/-ii-/-2-/g; s/-iii-/-3-/g; s/-iv-/-4-/g; s/-v-/-5-/g; s/-vi-/-6-/g; s/-vii-/-7-/g; s/-viii-/-8-/g; s/-viiii-/-9-/g; s/-x-/-10-/g;')"
  fi
  destarg=" -O $outdir$filename"
  prettyname="$filename from $url"
  if [ "$EUID" -eq 0 ]; then
    nvarg=" -nv"
  fi

  # compile wget command (for debugging purposes)
  command="wget$nvarg -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg$otherargs $url"

  n=$((n+1))
  if [ "$test" ]; then
    output "$command" "    "
  else
    wgetout=$(bash -c "($command)" 2>&1) # get stdout of command
    exitcode=$?
    failures=$(echo "$wgetout" | grep -i "$badwords")
    if [ $exitcode -eq 0 ] && [ -z "$failures" ]; then
      sed -i '/\"$url\"/d' $list
      output "Downloaded $prettyname" "\e[32m"
    else
      output "Downloading of $prettyname failed with code $exitcode: $(echo $failures | paste -sd \; -)" "\e[31m"
    fi
  fi

done < $list
