sitemap.shon commit add sitemap generator (de26b28)
   1#!/bin/bash
   2
   3# Tool for generating an XML sitemap from a directory of files
   4
   5# Andrew Lorimer - July 2022
   6
   7usagelong="\e[1mUSAGE:\e[0m
   8  $(basename "$0") OPTIONS
   9
  10\e[1mOPTIONS:\e[0m
  11  -d|--directory DIRECTORY
  12    Directory to search recursively for files to add to the sitemap.
  13    Only files with the text SITEMAP_INCLUDE anywhere in the file are
  14    indexed in the sitemap.
  15    Default: .
  16
  17  -o|--output OUTPUT
  18    Output file
  19    Default: sitemap.xml
  20
  21  -e|--extension EXTENSION
  22    Files ending with this string will be mapped and everything else ignored
  23    Default: .php
  24
  25  -p|--prefix PREFIX
  26    Prefix to use for URLS, including protocol and domain 
  27    Default: https://example.com
  28
  29  -q|--quiet
  30    Do not prompt before overwriting the output file
  31
  32  -h|--help
  33    Print this help and exit\n"
  34
  35XMLHEAD='<?xml version="1.0" encoding="UTF-8"?>
  36<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
  37XMLTAIL='</urlset>'
  38
  39if (( $# > 9)); then
  40  printf "$usagelong"
  41  exit
  42fi
  43
  44directory="."
  45output="sitemap.xml"
  46extension=".php"
  47prefix="https://example.com"
  48quiet=0
  49
  50# Process arguments
  51while [ $# -gt 0 ]; do
  52  case "$1" in
  53    -d|--directory)
  54      directory=(${2-})
  55      shift
  56      ;;
  57    -o|--output)
  58      output=(${2-})
  59      shift
  60      ;;
  61    -e|--extension)
  62      extension=(${2-})
  63      shift
  64      ;;
  65    -p|--prefix)
  66      prefix=(${2-})
  67      shift
  68      ;;
  69    -q|--quiet)
  70      quiet=1
  71      ;;
  72    -h|--help)
  73      printf "$usagelong"
  74      exit
  75      ;;
  76    --)
  77      break
  78      ;;
  79    *)
  80      printf "\x1b[31mInvalid argument $1\x1b[0m\n\n"
  81      printf "$usageshort"
  82      exit 1
  83      ;;
  84  esac
  85  shift
  86done
  87
  88# Check if output file exists and abort if we shouldn't overwrite
  89if [ -f "$output" ] && [ $quiet == 0 ]; then
  90  read -e -p "File $output exists. Overwrite? (y/n) " overwrite
  91  if [[ "$overwrite" != [Yy]* ]]; then
  92    printf "Aborting\n"
  93    exit
  94  fi
  95fi
  96
  97# Function to assemble a sitemap entry from a path
  98function generate_url () {
  99  moddate=`git log -n 1 --pretty=format:%cd --date=iso-strict $1`
 100  filename=`realpath -m --relative-to=$directory $1`
 101  echo -ne "\t<url>
 102  \t\t<loc>$prefix"
 103  if [[ $filename != "index"* ]]; then
 104    echo -n "/${filename%.*}"
 105  fi
 106  echo -ne "</loc>
 107  \t\t<lastmod>$moddate</lastmod>\n"
 108  depth=`echo $filename | awk -F"/" '{print NF-1}'`
 109  # Priority is 1 for top-level pages and reduces by 0.1 for each directory level
 110  echo -ne "\t\t<priority>`printf %.1f $(echo "1-0.1*$depth" | bc)`</priority>\n"
 111  echo -ne "\t</url>\n"
 112}
 113
 114echo "$XMLHEAD" > $output
 115
 116for file_path in $(find $directory -type f -and -iname "*$extension"); do
 117  # Only index if the string SITEMAP_INCLUDE is in the file
 118  if grep -q "SITEMAP_INCLUDE" $file_path; then
 119    url=$(generate_url "$file_path")
 120    echo "$url" >> $output
 121  fi
 122done
 123
 124echo "$XMLTAIL" >> $output