From f247dc12eb64f582d7c402c30a6dfb0ae0fa6f10 Mon Sep 17 00:00:00 2001 From: Andrew Lorimer Date: Mon, 21 May 2018 21:18:28 +1000 Subject: [PATCH 1/1] trailing slash in output dir --- duckduckget.sh | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/duckduckget.sh b/duckduckget.sh index 17c50dc..b24a2fb 100755 --- a/duckduckget.sh +++ b/duckduckget.sh @@ -3,13 +3,24 @@ # get urls from duckduckgo, then parse them to wget to download args=("$@") -search=${args[0]} +site=${args[0]} filetype=${args[1]} +outdir=${args[2]} -set -f -urls=`curl -silent https://duckduckgo.com/html/\?q\=site:www.mash.dept.shef.ac.uk/Resources | -grep 'pdf$' | tr -d ' \t\r' | grep -v '^[0-9]' | awk '{print "http://" $0}'` +if [ $# != 3 ] + then + printf "\n\x1b[31mWrong number of arguments\x1b[0m\n\n" + printf "Usage: duckduckget [site] [filetype] [outdir]\n where [site] is the domain to search for files\n [filetype] is extension without preceeding dot\n [outputdir] is the output directory relative to working directory" + exit 1 +fi + +if ! [[ $outdir =~ /$ ]] + then + $outdir=$outdir/ +fi + +urls=`curl -silent https://duckduckgo.com/html/\?q\=site:$site%20filetype:$filetype | grep "${filetype}$" | tr -d ' \t\r' | grep -v '^[0-9]' | awk '{print "http://" $0}'` for url in $urls; do - wget --cut-dirs=100 -P /mnt/andrew/literature/mathcentre/ $url + wget --cut-dirs=100 -P $outdir $url done -- 2.43.2