# get a list of urls and corresponding output file names, then parse them to wget
LOGTAG="nameget.sh"
+LOGFILE=/var/log/nameget
+BADWORDS="error\|failure\|unable"
args=("$@")
list=${args[0]}
-if [[ $list == "" ]]
- then
- printf "\n\x1b[31mWrong number of arguments\x1b[0m\n\n"
- printf "Usage: nameget LIST [DESTINATION] [ARGS]\n where LIST is a text file of format [url] [name] (one pair per line)\n [DESTINATION] is a path appended to the start of each [name] in LIST\n [ARGS] is passed directly to wget"
- exit 1
+#
+# Validate arguments
+#
+
+if [ "$EUID" -ne 0 ]
+ LOGFILE=/dev/null # redirect log because we're probably not running on a cron job
+fi
+
+if [[ $list == "" ]]; then
+ printf "\n\x1b[31mWrong number of arguments\x1b[0m\n\n"
+ printf "Usage: nameget LIST [DESTINATION] [ARGS]\n where LIST is a text file of format [url] [name] (one pair per line)\n [DESTINATION] is a path appended to the start of each [name] in LIST\n [ARGS] is passed directly to wget"
+ exit 1
fi
-if [[ -d ${args[1]} ]]; then # check if args[2] is [DESTINATION] or [ARGS]
+if [[ -d ${args[1]} ]]; then # check if args[2] is [DESTINATION] or [ARGS]
outdir=${args[1]}
- if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing /
+ if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing /
outdir=$outdir/
fi
shift 2
shift 1
fi
-otherargs=$@ # set aside remaining arguments for passing to wget
+otherargs=$@ # set aside remaining arguments for passing to wget
+
+
+#
+# Recurse through list
+#
-logger -s -t $LOGTAG "starting downloading files in $list"
+logger -s -t $LOGTAG "starting downloading files in $(readlink -f $list)" >> $LOGFILE
while read -r url filename; do
- destarg = ''
+
+ # parse output filename
+ destarg='' # placeholder for -O argument (output filename)
+ prettyname="$url" # placeholder for failure log
if [ -n "$filename" ]; then
- $destarg = " -O $outdir$filename"
-
- if output=$(wget --cut-dirs=100 -np -e robots=off -P $outdir$destarg $otherargs $url); then
- sed -i "/$filename/d" $list
- echo $filename
- logger -s -t $LOGTAG "downloaded $filename"
- else
- logger -s -t $LOGTAG "downloading of $filename from $url failed: $output"
- fi
+ destarg=" -O $outdir$filename"
+ prettyname="$filename from $url"
+ else
+ filename=${url##/*/} # extract basename
+ filename=${filename/[ _]/-} # substitute spaces/underscores with dashes
+ filename=$(echo $filename | tr '[:upper:]' '[:lower:]')
+ filename=$(tr -cd "[:print:]\n" < file1)
+ filename=$echo $filename | tr -d
+ fi
+
+ # compile wget command (for debugging purposes)
+ command="wget -r -nv -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg $otherargs $url"
+
+ output=$($command 2>&1) # get stderr of command
+ exitcode=$?
+
+ if [ $exitcode -eq 0 ] || [ `echo $errmsg | grep -i \"$badwords\"` ]; then
+ sed -i '/\"$url\"/d' $list
+ logger -s -t $LOGTAG "downloaded $prettyname" >> $LOGFILE
+ else
+ logger -s -t $LOGTAG "downloading of $prettyname failed with code $exitcode: $(echo $output | sed -n -e 's/.*\(error.*\|failure.*\|unable.*\)$/\1/pI' | paste -sd \; -)" >> $LOGFILE
+ fi
done < $list