From de26b2807b45465ef537570b613877936669c90f Mon Sep 17 00:00:00 2001 From: Andrew Lorimer Date: Mon, 24 Nov 2025 21:25:39 +1100 Subject: [PATCH] add sitemap generator --- sitemap.sh | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100755 sitemap.sh diff --git a/sitemap.sh b/sitemap.sh new file mode 100755 index 0000000..dec2bb1 --- /dev/null +++ b/sitemap.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +# Tool for generating an XML sitemap from a directory of files + +# Andrew Lorimer - July 2022 + +usagelong="\e[1mUSAGE:\e[0m + $(basename "$0") OPTIONS + +\e[1mOPTIONS:\e[0m + -d|--directory DIRECTORY + Directory to search recursively for files to add to the sitemap. + Only files with the text SITEMAP_INCLUDE anywhere in the file are + indexed in the sitemap. + Default: . + + -o|--output OUTPUT + Output file + Default: sitemap.xml + + -e|--extension EXTENSION + Files ending with this string will be mapped and everything else ignored + Default: .php + + -p|--prefix PREFIX + Prefix to use for URLS, including protocol and domain + Default: https://example.com + + -q|--quiet + Do not prompt before overwriting the output file + + -h|--help + Print this help and exit\n" + +XMLHEAD=' +' +XMLTAIL='' + +if (( $# > 9)); then + printf "$usagelong" + exit +fi + +directory="." +output="sitemap.xml" +extension=".php" +prefix="https://example.com" +quiet=0 + +# Process arguments +while [ $# -gt 0 ]; do + case "$1" in + -d|--directory) + directory=(${2-}) + shift + ;; + -o|--output) + output=(${2-}) + shift + ;; + -e|--extension) + extension=(${2-}) + shift + ;; + -p|--prefix) + prefix=(${2-}) + shift + ;; + -q|--quiet) + quiet=1 + ;; + -h|--help) + printf "$usagelong" + exit + ;; + --) + break + ;; + *) + printf "\x1b[31mInvalid argument $1\x1b[0m\n\n" + printf "$usageshort" + exit 1 + ;; + esac + shift +done + +# Check if output file exists and abort if we shouldn't overwrite +if [ -f "$output" ] && [ $quiet == 0 ]; then + read -e -p "File $output exists. Overwrite? (y/n) " overwrite + if [[ "$overwrite" != [Yy]* ]]; then + printf "Aborting\n" + exit + fi +fi + +# Function to assemble a sitemap entry from a path +function generate_url () { + moddate=`git log -n 1 --pretty=format:%cd --date=iso-strict $1` + filename=`realpath -m --relative-to=$directory $1` + echo -ne "\t + \t\t$prefix" + if [[ $filename != "index"* ]]; then + echo -n "/${filename%.*}" + fi + echo -ne " + \t\t$moddate\n" + depth=`echo $filename | awk -F"/" '{print NF-1}'` + # Priority is 1 for top-level pages and reduces by 0.1 for each directory level + echo -ne "\t\t`printf %.1f $(echo "1-0.1*$depth" | bc)`\n" + echo -ne "\t\n" +} + +echo "$XMLHEAD" > $output + +for file_path in $(find $directory -type f -and -iname "*$extension"); do + # Only index if the string SITEMAP_INCLUDE is in the file + if grep -q "SITEMAP_INCLUDE" $file_path; then + url=$(generate_url "$file_path") + echo "$url" >> $output + fi +done + +echo "$XMLTAIL" >> $output -- 2.49.0