1#!/bin/bash
2
3# Tool for generating an XML sitemap from a directory of files
4
5# Andrew Lorimer - July 2022
6
7usagelong="\e[1mUSAGE:\e[0m
8 $(basename "$0") OPTIONS
9
10\e[1mOPTIONS:\e[0m
11 -d|--directory DIRECTORY
12 Directory to search recursively for files to add to the sitemap.
13 Only files with the text SITEMAP_INCLUDE anywhere in the file are
14 indexed in the sitemap.
15 Default: .
16
17 -o|--output OUTPUT
18 Output file
19 Default: sitemap.xml
20
21 -e|--extension EXTENSION
22 Files ending with this string will be mapped and everything else ignored
23 Default: .php
24
25 -p|--prefix PREFIX
26 Prefix to use for URLS, including protocol and domain
27 Default: https://example.com
28
29 -q|--quiet
30 Do not prompt before overwriting the output file
31
32 -h|--help
33 Print this help and exit\n"
34
35XMLHEAD='<?xml version="1.0" encoding="UTF-8"?>
36<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
37XMLTAIL='</urlset>'
38
39if (( $# > 9)); then
40 printf "$usagelong"
41 exit
42fi
43
44directory="."
45output="sitemap.xml"
46extension=".php"
47prefix="https://example.com"
48quiet=0
49
50# Process arguments
51while [ $# -gt 0 ]; do
52 case "$1" in
53 -d|--directory)
54 directory=(${2-})
55 shift
56 ;;
57 -o|--output)
58 output=(${2-})
59 shift
60 ;;
61 -e|--extension)
62 extension=(${2-})
63 shift
64 ;;
65 -p|--prefix)
66 prefix=(${2-})
67 shift
68 ;;
69 -q|--quiet)
70 quiet=1
71 ;;
72 -h|--help)
73 printf "$usagelong"
74 exit
75 ;;
76 --)
77 break
78 ;;
79 *)
80 printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
81 printf "$usageshort"
82 exit 1
83 ;;
84 esac
85 shift
86done
87
88# Check if output file exists and abort if we shouldn't overwrite
89if [ -f "$output" ] && [ $quiet == 0 ]; then
90 read -e -p "File $output exists. Overwrite? (y/n) " overwrite
91 if [[ "$overwrite" != [Yy]* ]]; then
92 printf "Aborting\n"
93 exit
94 fi
95fi
96
97# Function to assemble a sitemap entry from a path
98function generate_url () {
99 moddate=`git log -n 1 --pretty=format:%cd --date=iso-strict $1`
100 filename=`realpath -m --relative-to=$directory $1`
101 echo -ne "\t<url>
102 \t\t<loc>$prefix"
103 if [[ $filename != "index"* ]]; then
104 echo -n "/${filename%.*}"
105 fi
106 echo -ne "</loc>
107 \t\t<lastmod>$moddate</lastmod>\n"
108 depth=`echo $filename | awk -F"/" '{print NF-1}'`
109 # Priority is 1 for top-level pages and reduces by 0.1 for each directory level
110 echo -ne "\t\t<priority>`printf %.1f $(echo "1-0.1*$depth" | bc)`</priority>\n"
111 echo -ne "\t</url>\n"
112}
113
114echo "$XMLHEAD" > $output
115
116for file_path in $(find $directory -type f -and -iname "*$extension"); do
117 # Only index if the string SITEMAP_INCLUDE is in the file
118 if grep -q "SITEMAP_INCLUDE" $file_path; then
119 url=$(generate_url "$file_path")
120 echo "$url" >> $output
121 fi
122done
123
124echo "$XMLTAIL" >> $output