1#! /bin/bash
2#
3# Get a list of urls and optional corresponding output paths, then send them to wget
4# For more info, see `nameget.sh --help`
5# Andrew Lorimer https://lorimer.id.au
6#
7
8VERSION="1.0"
9LOGTAG="$(basename "$0")"
10LOGFILE=/var/log/nameget
11BADWORDS="error\|failure\|unable\|denied\|directory"
12outdir="./"
13
14function output() {
15 if [ $test ] || [ "$EUID" -ne 0 ]; then
16 printf "$2%s\e[0m\n" "$1"
17 else
18 logger -s -t $LOGTAG "$1" >> $LOGFILE
19 fi
20}
21
22usagelong="\e[34m$(basename "$0")\e[0m $VERSION
23Get a list of urls and optional corresponding output paths, then send them to wget
24
25\e[1mUSAGE:\e[0m
26 $(basename "$0") [OPTIONS]
27
28\e[1mOPTIONS:\e[0m
29 -l, --list
30 Text file where each line contains a url and filename separated by a space. Filename is optional. Lines starting with # are ignored.
31 Default: ./queue.txt
32
33 -h, --help
34 Show this help message
35
36 -t, --test
37 Dry run - parses LIST and prints wget commands but does not execute them
38
39 -d, --destination \e[4m<destination>\e[0m
40 Directory in which to save each file in LIST
41 Default: ./
42
43 -n, --number
44 Maximum number of files to download
45 Default: no limit
46
47 -a, --args \e[4m<args>\e[0m
48 String of shell arguments which are passed verbatim to wget\n"
49
50usageshort="\e[1mUSAGE:\e[0m
51 $(basename "$0") [OPTIONS] -l LIST
52
53For more information, see \e[34m$(basename "$0") --help\e[0m\n"
54
55if [ "$EUID" -ne 0 ]; then
56 LOGFILE=/dev/null # redirect log because we're probably not running on a cron job
57fi
58
59#
60# Validate arguments
61#
62
63list='queue.txt'
64
65while [ $# -gt 0 ]; do
66 case "$1" in
67 -l|--list)
68 list=(${2-})
69 shift
70 ;;
71 -h|--help)
72 printf "$usagelong"
73 exit
74 ;;
75 -t|--test)
76 test=1
77 shift
78 ;;
79 -d|--destination)
80 outdir=(${2-})
81 if [ -z "$outdir" ] || [ "$outdir" = " " ]; then
82 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
83 printf "$usageshort"
84 exit 1
85 fi
86 shift
87 ;;
88 -n|--number)
89 maxdl=(${2-})
90 if [ -z "$maxdl" ] || [ "$maxdl" = " " ]; then
91 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
92 printf "$usageshort"
93 exit 1
94 fi
95 shift
96 ;;
97 -a|--args)
98 otherargs=" ${2-}"
99 if [ -z "$otherargs" ] || [ "$otherargs" = " " ]; then
100 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
101 printf "$usageshort"
102 exit 1
103 fi
104 shift
105 ;;
106 --)
107 break
108 ;;
109 *)
110 printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
111 printf "$usageshort"
112 exit 1
113 ;;
114 esac
115 shift
116done
117
118if [ -z "$list" ] || [ "$list" = " " ]; then
119 printf "\x1b[31mList location not specified\x1b[0m\n\n" >&2
120 printf "$usageshort" >&2
121 exit 1
122fi
123
124output "Welcome to $(basename "$0") $VERSION" "\e[34m"
125
126if [ "$test" ]; then
127 output "Running in test mode"
128 output "Finished parsing options"
129fi
130
131
132if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing /
133 outdir=$outdir/
134 fi
135
136#
137# Iterate through list
138#
139
140
141output "Starting downloading files in $(readlink -f $list) to $outdir" "\n\e[1m"
142
143n=0
144
145while read -r url filename; do
146
147 # Ignore if line is commented
148 [[ "$url" =~ ^\s*#.* ]] && continue
149
150 # Stop when maxdl is exceeded
151 [ ! -z ${maxdl} ] && [[ $n -ge $maxdl ]] && break
152
153 # parse output filename
154 destarg='' # placeholder for -O argument (output filename)
155 if [ -z "$filename" ] || [ "filename" = "" ]; then
156 filename="$(echo $url | sed 's/\/\$//; s/.*\///; s/[_ \.]/-/g; s/\%20/-/g; s/\(.*\)/\L\1/; s/^the-//; s/-the\(-movie\)\?\(-film\)\?//g; s/^a-//; s/-(*\(20\|19\)[0-9]\{2\})*.*-\(.*\)/.\2/g; s/-[0-9]\{3,4\}p.*-\(.*\)/.\1/g; s/-ii-/-2-/g; s/-iii-/-3-/g; s/-iv-/-4-/g; s/-v-/-5-/g; s/-vi-/-6-/g; s/-vii-/-7-/g; s/-viii-/-8-/g; s/-viiii-/-9-/g; s/-x-/-10-/g;')"
157 fi
158 destarg=" -O $outdir$filename"
159 prettyname="$filename from $url"
160 if [ "$EUID" -eq 0 ]; then
161 nvarg=" -nv"
162 fi
163
164 # compile wget command (for debugging purposes)
165 command="wget$nvarg -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg$otherargs $url"
166
167 n=$((n+1))
168 if [ "$test" ]; then
169 output "$command" " "
170 else
171 wgetout=$(bash -c "($command)" 2>&1) # get stdout of command
172 exitcode=$?
173 failures=$(echo "$wgetout" | grep -i "$badwords")
174 if [ $exitcode -eq 0 ] && [ -z "$failures" ]; then
175 sed -i '/\"$url\"/d' $list
176 output "Downloaded $prettyname" "\e[32m"
177 else
178 output "Downloading of $prettyname failed with code $exitcode: $(echo $failures | paste -sd \; -)" "\e[31m"
179 fi
180 fi
181
182done < $list