063544c84280843d23f6d42354adfcf2b81d5d59
1#! /bin/bash
2#
3# Get a list of urls and optional corresponding output paths, then send them to wget
4# For more info, see `nameget.sh --help`
5# Andrew Lorimer https://lorimer.id.au
6#
7
8VERSION="0.9"
9LOGTAG="$(basename "$0")"
10LOGFILE=/var/log/nameget
11BADWORDS="error\|failure\|unable\|denied\|directory"
12outdir="./"
13
14function output() {
15 if [ $test ] || [ "$EUID" -ne 0 ]; then
16 printf "$2%s\e[0m\n" "$1"
17 else
18 logger -s -t $LOGTAG "$1" >> $LOGFILE
19 fi
20}
21
22usagelong="\e[34m$(basename "$0")\e[0m $VERSION
23Get a list of urls and optional corresponding output paths, then send them to wget
24
25\e[1mUSAGE:\e[0m
26 $(basename "$0") [OPTIONS]
27
28\e[1mOPTIONS:\e[0m
29 -l, --list
30 Text file where each line contains a url and filename separated by a space. Filename is optional. Lines starting with # are ignored.
31 Default: ./queue.txt
32
33 -h, --help
34 Show this help message
35
36 -t, --test
37 Dry run - parses LIST and prints wget commands but does not execute them
38
39 -d, --destination \e[4m<destination>\e[0m
40 Directory in which to save each file in LIST
41 Default: ./
42
43 -n, --number
44 Maximum number of files to download
45 Default: none
46
47 -a, --args \e[4m<args>\e[0m
48 String of shell arguments which are passed verbatim to wget\n"
49
50usageshort="\e[1mUSAGE:\e[0m
51 $(basename "$0") [OPTIONS] -l LIST
52
53For more information, see \e[34m$(basename "$0") --help\e[0m\n"
54
55if [ "$EUID" -ne 0 ]; then
56 LOGFILE=/dev/null # redirect log because we're probably not running on a cron job
57fi
58
59#
60# Validate arguments
61#
62
63while [ $# -gt 0 ]; do
64 case "$1" in
65 -l|--list)
66 list=(${2-})
67 shift
68 ;;
69 -h|--help)
70 printf "$usagelong"
71 exit
72 ;;
73 -t|--test)
74 test=1
75 shift
76 ;;
77 -d|--destination)
78 outdir=(${2-})
79 if [ -z "$outdir" ] || [ "$outdir" = " " ]; then
80 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
81 printf "$usageshort"
82 exit 1
83 fi
84 shift
85 ;;
86 -n|--number)
87 maxdl=(${2-})
88 if [ -z "$maxdl" ] || [ "$maxdl" = " " ]; then
89 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
90 printf "$usageshort"
91 exit 1
92 fi
93 shift
94 ;;
95 -a|--args)
96 otherargs=" ${2-}"
97 if [ -z "$otherargs" ] || [ "$otherargs" = " " ]; then
98 printf "\x1b[31mOption $1 requires an argument\x1b[0m\n\n"
99 printf "$usageshort"
100 exit 1
101 fi
102 shift
103 ;;
104 --)
105 break
106 ;;
107 *)
108 printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
109 printf "$usageshort"
110 exit 1
111 ;;
112 esac
113 shift
114done
115
116if [ -z "$list" ] || [ "$list" = " " ]; then
117 printf "\x1b[31mList location not specified\x1b[0m\n\n" >&2
118 printf "$usageshort" >&2
119 exit 1
120fi
121
122output "Welcome to $(basename "$0") $VERSION" "\e[34m"
123
124if [ "$test" ]; then
125 output "Running in test mode"
126 output "Finished parsing options"
127fi
128
129
130if [[ ! $outdir =~ /$ ]]; then # check if [DESTINATION] has a trailing /
131 outdir=$outdir/
132 fi
133
134#
135# Recurse through list
136#
137
138
139output "Starting downloading files in $(readlink -f $list) to $outdir" "\n\e[1m"
140
141n=0
142
143while read -r url filename; do
144
145 # Ignore if line is commented
146 [[ "$url" =~ ^\s*#.* ]] && continue
147
148 # Stop when maxdl is exceeded
149 [ ! -z ${maxdl} ] && [[ $n -ge $maxdl ]] && break
150
151 # parse output filename
152 destarg='' # placeholder for -O argument (output filename)
153 if [ -z "$filename" ] || [ "filename" = "" ]; then
154 filename="$(echo $url | sed 's/\/\$//; s/.*\///; s/[_ \.]/-/g; s/\%20/-/g; s/\(.*\)/\L\1/; s/^the-//; s/-the\(-movie\)\?\(-film\)\?//g; s/^a-//; s/-(*\(20\|19\)[0-9]\{2\})*.*-\(.*\)/.\2/g; s/-[0-9]\{3,4\}p.*-\(.*\)/.\1/g; s/-ii-/-2-/g; s/-iii-/-3-/g; s/-iv-/-4-/g; s/-v-/-5-/g; s/-vi-/-6-/g; s/-vii-/-7-/g; s/-viii-/-8-/g; s/-viiii-/-9-/g; s/-x-/-10-/g;')"
155 fi
156 destarg=" -O $outdir$filename"
157 prettyname="$filename from $url"
158 if [ "$EUID" -eq 0 ]; then
159 nvarg=" -nv"
160 fi
161
162 # compile wget command (for debugging purposes)
163 command="wget$nvarg -R "*index.html*" -c -nd -nH --cut-dirs=100 -np -e robots=off -P $outdir$destarg$otherargs $url"
164
165 n=$((n+1))
166 if [ "$test" ]; then
167 output "$command" " "
168 else
169 if [ "$EUID" -ne 0 ]; then # get stdout of command
170 wgetout=$($command | tee /dev/tty) # get stdout of command
171 else
172 wgetout=$($command 2>&1 > /dev/null) # get stderr of command
173 fi
174 exitcode=$?
175 failures=$(echo "$wgetout" | grep -i "$badwords")
176 if [ $exitcode -eq 0 ] && [ -z "$failures" ]; then
177 sed -i '/\"$url\"/d' $list
178 output "Downloaded $prettyname" "\e[32m"
179 else
180 output "Downloading of $prettyname failed with code $exitcode: $(echo $failures | paste -sd \; -)" "\e[31m"
181 fi
182 fi
183
184done < $list