add sitemap generator
authorAndrew Lorimer <andrew@lorimer.id.au>
Mon, 24 Nov 2025 10:25:39 +0000 (21:25 +1100)
committerAndrew Lorimer <andrew@lorimer.id.au>
Mon, 24 Nov 2025 10:25:39 +0000 (21:25 +1100)
sitemap.sh [new file with mode: 0755]
diff --git a/sitemap.sh b/sitemap.sh
new file mode 100755 (executable)
index 0000000..dec2bb1
--- /dev/null
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# Tool for generating an XML sitemap from a directory of files
+
+# Andrew Lorimer - July 2022
+
+usagelong="\e[1mUSAGE:\e[0m
+  $(basename "$0") OPTIONS
+
+\e[1mOPTIONS:\e[0m
+  -d|--directory DIRECTORY
+    Directory to search recursively for files to add to the sitemap.
+    Only files with the text SITEMAP_INCLUDE anywhere in the file are
+    indexed in the sitemap.
+    Default: .
+
+  -o|--output OUTPUT
+    Output file
+    Default: sitemap.xml
+
+  -e|--extension EXTENSION
+    Files ending with this string will be mapped and everything else ignored
+    Default: .php
+
+  -p|--prefix PREFIX
+    Prefix to use for URLS, including protocol and domain 
+    Default: https://example.com
+
+  -q|--quiet
+    Do not prompt before overwriting the output file
+
+  -h|--help
+    Print this help and exit\n"
+
+XMLHEAD='<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
+XMLTAIL='</urlset>'
+
+if (( $# > 9)); then
+  printf "$usagelong"
+  exit
+fi
+
+directory="."
+output="sitemap.xml"
+extension=".php"
+prefix="https://example.com"
+quiet=0
+
+# Process arguments
+while [ $# -gt 0 ]; do
+  case "$1" in
+    -d|--directory)
+      directory=(${2-})
+      shift
+      ;;
+    -o|--output)
+      output=(${2-})
+      shift
+      ;;
+    -e|--extension)
+      extension=(${2-})
+      shift
+      ;;
+    -p|--prefix)
+      prefix=(${2-})
+      shift
+      ;;
+    -q|--quiet)
+      quiet=1
+      ;;
+    -h|--help)
+      printf "$usagelong"
+      exit
+      ;;
+    --)
+      break
+      ;;
+    *)
+      printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
+      printf "$usageshort"
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+# Check if output file exists and abort if we shouldn't overwrite
+if [ -f "$output" ] && [ $quiet == 0 ]; then
+  read -e -p "File $output exists. Overwrite? (y/n) " overwrite
+  if [[ "$overwrite" != [Yy]* ]]; then
+    printf "Aborting\n"
+    exit
+  fi
+fi
+
+# Function to assemble a sitemap entry from a path
+function generate_url () {
+  moddate=`git log -n 1 --pretty=format:%cd --date=iso-strict $1`
+  filename=`realpath -m --relative-to=$directory $1`
+  echo -ne "\t<url>
+  \t\t<loc>$prefix"
+  if [[ $filename != "index"* ]]; then
+    echo -n "/${filename%.*}"
+  fi
+  echo -ne "</loc>
+  \t\t<lastmod>$moddate</lastmod>\n"
+  depth=`echo $filename | awk -F"/" '{print NF-1}'`
+  # Priority is 1 for top-level pages and reduces by 0.1 for each directory level
+  echo -ne "\t\t<priority>`printf %.1f $(echo "1-0.1*$depth" | bc)`</priority>\n"
+  echo -ne "\t</url>\n"
+}
+
+echo "$XMLHEAD" > $output
+
+for file_path in $(find $directory -type f -and -iname "*$extension"); do
+  # Only index if the string SITEMAP_INCLUDE is in the file
+  if grep -q "SITEMAP_INCLUDE" $file_path; then
+    url=$(generate_url "$file_path")
+    echo "$url" >> $output
+  fi
+done
+
+echo "$XMLTAIL" >> $output