From: Andrew Lorimer Date: Wed, 15 May 2019 07:51:23 +0000 (+1000) Subject: Merge branch 'master' of charles:/tank/andrew/code/scripts X-Git-Url: https://git.lorimer.id.au/scripts.git/diff_plain/ac9f601182c2ba2ed4baa3305f61e9507d524f18?hp=6d82cba51d65cf29099ed618d71508184ff48e85 Merge branch 'master' of charles:/tank/andrew/code/scripts --- diff --git a/nameget.sh b/nameget.sh index 63b2924..43016da 100755 --- a/nameget.sh +++ b/nameget.sh @@ -1,72 +1,162 @@ #! /bin/bash +# +# Get a list of urls and optional corresponding output paths, then send them to wget +# For more info, see `nameget.sh --help` +# Andrew Lorimer https://lorimer.id.au +# -# get a list of urls and corresponding output file names, then parse them to wget - -LOGTAG="nameget.sh" +VERSION="0.9" +LOGTAG="$(basename "$0")" LOGFILE=/var/log/nameget -BADWORDS="error\|failure\|unable" -args=("$@") -list=${args[0]} +BADWORDS="error\|failure\|unable\|denied\|directory" +outdir="./" + +function output() { + if [ $test ] || [ "$EUID" -ne 0 ]; then + printf "$2%s\e[0m\n" "$1" + else + logger -s -t $LOGTAG "$1" >> $LOGFILE + fi +} + +usagelong="\e[34m$(basename "$0")\e[0m $VERSION +Get a list of urls and optional corresponding output paths, then send them to wget + +\e[1mUSAGE:\e[0m + $(basename "$0") [OPTIONS] + +\e[1mOPTIONS:\e[0m + -l, --list + Text file where each line contains a url and filename separated by a space. Filename is optional. + Default: ./queue.txt + + -h, --help + Show this help message + + -t, --test + Dry run - parses LIST and prints wget commands but does not execute them + + -d, --destination \e[4m\e[0m + Directory in which to save each file in LIST + Default: ./ + + -a, --args \e[4m\e[0m + String of shell arguments which are passed verbatim to wget\n" + +usageshort="\e[1mUSAGE:\e[0m + $(basename "$0") [OPTIONS] -l LIST + +For more information, see \e[34m$(basename "$0") --help\e[0m\n" + +if [ "$EUID" -ne 0 ]; then + LOGFILE=/dev/null # redirect log because we're probably not running on a cron job +fi # # Validate arguments # -if [ "$EUID" -ne 0 ] - LOGFILE=/dev/null # redirect log because we're probably not running on a cron job -fi +while [ $# -gt 0 ]; do + case "$1" in + -l|--list) + list=(${2-}) + shift + ;; + -h|--help) + printf "$usagelong" + exit + ;; + -t|--test) + test=1 + shift + ;; + -d|--destination) + outdir=(${2-}) + if [ -z "$outdir" ] || [ "$outdir" = " " ]; then + printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n" + printf "$usageshort" + exit 1 + fi + shift + ;; + -a|--args) + otherargs=" ${2-}" + if [ -z "$otherargs" ] || [ "$otherargs" = " " ]; then + printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n" + printf "$usageshort" + exit 1 + fi + shift + ;; + --) + break + ;; + *) + printf "\x1b[31mInvalid argument $1\x1b[0m\n\n" + printf "$usageshort" + exit 1 + ;; + esac + shift +done -if [[ $list == "" ]]; then - printf "\n\x1b[31mWrong number of arguments\x1b[0m\n\n" - printf "Usage: nameget LIST [DESTINATION] [ARGS]\n where LIST is a text file of format [url] [name] (one pair per line)\n [DESTINATION] is a path appended to the start of each [name] in LIST\n [ARGS] is passed directly to wget" +if [ -z "$list" ] || [ "$list" = " " ]; then + printf "\x1b[31mList location not specified\x1b[0m\n\n" >&2 + printf "$usageshort" >&2 exit 1 fi -if [[ -d ${args[1]} ]]; then # check if args[2] is [DESTINATION] or [ARGS] - outdir=${args[1]} - if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing / - outdir=$outdir/ - fi - shift 2 -else - shift 1 +output "Welcome to $(basename "$0") $VERSION" "\e[34m" + +if [ "$test" ]; then + output "Running in test mode" + output "Finished parsing options" fi -otherargs=$@ # set aside remaining arguments for passing to wget +if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing / + outdir=$outdir/ + fi # # Recurse through list # -logger -s -t $LOGTAG "starting downloading files in $(readlink -f $list)" >> $LOGFILE + +output "Starting downloading files in $(readlink -f $list) to $outdir" "\n\e[1m" while read -r url filename; do # parse output filename destarg='' # placeholder for -O argument (output filename) - prettyname="$url" # placeholder for failure log - if [ -n "$filename" ]; then - destarg=" -O $outdir$filename" - prettyname="$filename from $url" - else - filename=${url##/*/} # extract basename - filename=${filename/[ _]/-} # substitute spaces/underscores with dashes - filename=$(echo $filename | tr '[:upper:]' '[:lower:]') - filename=$(tr -cd "[:print:]\n" < file1) - filename=$echo $filename | tr -d + if [ -z "$filename" ] || [ "filename" = "" ]; then + filename="$(echo $url | sed 's/\/\$//; s/.*\///; s/[_ \.]/-/g; s/\(.*\)/\L\1/; s/^the-//; s/-the\(-movie\)\?\(-film\)\?//g; s/^a-//; s/-\(20\|19\)[0-9]\{2\}.*-\(.*\)/.\2/g; s/-[0-9]\{3,4\}p.*-\(.*\)/.\1/g; s/-ii-/-2-/g; s/-iii-/-3-/g; s/-iv-/-4-/g; s/-v-/-5-/g; s/-vi-/-6-/g; s/-vii-/-7-/g; s/-viii-/-8-/g; s/-viiii-/-9-/g; s/-x-/-10-/g;')" + fi + destarg=" -O $outdir$filename" + prettyname="$filename from $url" + if [ "$EUID" -eq 0 ]; then + nvarg=" -nv" fi - - # compile wget command (for debugging purposes) - command="wget -r -nv -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg $otherargs $url" - output=$($command 2>&1) # get stderr of command - exitcode=$? + # compile wget command (for debugging purposes) + command="wget$nvarg -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg$otherargs $url" - if [ $exitcode -eq 0 ] || [ `echo $errmsg | grep -i \"$badwords\"` ]; then - sed -i '/\"$url\"/d' $list - logger -s -t $LOGTAG "downloaded $prettyname" >> $LOGFILE + if [ "$test" ]; then + output "$command" " " else - logger -s -t $LOGTAG "downloading of $prettyname failed with code $exitcode: $(echo $output | sed -n -e 's/.*\(error.*\|failure.*\|unable.*\)$/\1/pI' | paste -sd \; -)" >> $LOGFILE + if [ "$EUID" -ne 0 ]; then # get stdout of command + wgetout=$($command | tee /dev/tty) # get stdout of command + else + wgetout=$($command 2>&1 > /dev/null) # get stderr of command + fi + exitcode=$? + failures=$(echo "$wgetout" | grep -i "$badwords") + if [ $exitcode -eq 0 ] && [ -z "$failures" ]; then + sed -i '/\"$url\"/d' $list + output "Downloaded $prettyname" "\e[32m" + else + output "Downloading of $prettyname failed with code $exitcode: $(echo $failures | paste -sd \; -)" "\e[31m" + fi fi + done < $list