#!/bin/bash
#
# webp-convert.sh
#
# Search through HTML and PHP files in a directory, get a list of the image
# files used (jpg/png), and convert these to webp in the same directory as
# the originals. Alternatively, convert all jpg/png files in a directory to 
# webp. Can be used with a rewrite rule on the web server to serve the webp 
# file when the original is requested.
#
# Andrew Lorimer, January 2021
#

usagelong="\e[1mUSAGE:\e[0m $(basename "$0") [OPTIONS] [DIRECTORY]

\e[1mOPTIONS:\e[0m
  -a|--all
    Convert all image files in DIRECTORY to webp. This is the default mode. 
    This is mutually exclusive with -s|--scan.

  -s|--scan
    Scan all files with .php or .html extension in DIRECTORY for <img> tags 
    and convert source images to webp. This is mutually exclusive with 
    -a|--all.

  -o|--overwrite
    Convert all files even if the output filename exists (the default is to 
    ignore files for which a webp already exists). When calculating space 
    savings of files which overwrite existing ones, the saving is relative to 
    the source file, not the file that is overwritten.

  -q|--quiet
    Do not print any output (apart from errors). Mutually exclusive with 
    -v|--verbose.

  -v|--verbose
    Print extra output for each file. Mutually exclusive with -q|--quiet.

  -d|--dry
    Dry run - search for files and attempt to convert them but do not make any
    permanent changes. Used for checking possible space savings.

  -h|--help
    Print this help message and exit.

\e[1mDIRECTORY:\e[0m
    Directory in which to search for PHP or HTML files (in -s|--scan mode) or 
    image files (in -a|--all mode).

Converts JPG and PNG files to WebP using ImageMagick's convert(1). Optionally 
searches for source file in a directory or searches through HTML/PHP files in 
a directory to find references to source files.\n"

usageshort="\e[1mUSAGE:\e[0m
  $(basename "$0") [MODE] [OPTIONS] [DIRECTORY]

For more information, see \e[34m$(basename "$0") --help\e[0m\n"

modehint="\x1b[31m-s|--scan and -a|--all cannot be specified simultaneously\x1b[0m\n\n$usageshort"
verbosityhint="\x1b[31m-q|--quiet and -v|--verbose cannot be specified simultaneously\x1b[0m\n\n$usageshort"
dryhint="\x1b[33mDry run mode\x1b[0m\n"
overwritehint="\x1b[33mOverwriting existing files\x1b[0m\n"
invalidhint="\x1b[31mInvalid argument $1\x1b[0m\n\n$usageshort"

if (( $# > 5)); then
  printf "$usageshort"
  exit 1
fi

mode=-1       # -1: initial, 0: all (default), 1: scan
overwrite=0
dry=0
verbosity=-1  # -1: initial, 0: quiet, 1: normal, 2: verbose
directory=""

while [ $# -gt 0 ]; do
  case "$1" in
    -a|--all)
      if [[ $mode == 1 ]]; then
        printf "$modehint"
        exit 1
      fi
      mode=0
      shift
      ;;
    -s|--scan)
      if [[ $mode == 0 ]]; then
        printf "$modehint"
        exit 1
      fi
      mode=1
      shift
      ;;
    -o|--overwrite)
      overwrite=1
      shift
      ;;
    -q|--quiet)
      if [[ $verbosity == 2 ]]; then
        printf "$verbosityhint"
        exit 1
      fi
      verbosity=0
      shift
      ;;
    -v|--verbose)
      if [[ $verbosity == 0 ]]; then
        printf "$verbosityhint"
        exit 1
      fi
      verbosity=2
      shift
      ;;
    -d|--dry)
      dry=1
      shift
      ;;
    -h|--help)
      printf "$usagelong"
      exit
      ;;
    *)
      if [[ "$directory" = "" && "$1" != -* ]]; then
        directory=$1
        shift
      else
        printf "$invalidhint"
        exit 1
      fi
      ;;
  esac
done

# Set mode to 0 (all) if not specified
if [[ $mode == -1 ]]; then
  mode=0
fi

# Set directory to . if not specified
if [[ "$directory" = "" ]]; then
  directory="."
fi

# Set verbosity to 1 if not specified
if [[ $verbosity == -1 ]]; then
  verbosity=1
fi

# Indicate that we are running in dry mode
if [[ $verbosity > 0 && $dry == 1 ]]; then
  printf "$dryhint"
fi

# Indicate that we will overwrite
if [[ $verbosity > 0 && $overwrite == 1 ]]; then
  printf "$overwritehint"
fi

savings=()
savings_total=0
found_count=0
convert_count=0
larger_count=0
failed_count=0

convert_webp () {
  # Perform the conversion. Takes input filename, output filename.

  # Set destination (user-supplied is quoted to handle spaces properly)
  if [[ $dry == 1 ]]; then
    suffix="/tmp/webp-convert.webp"
  else
    suffix="$2"
  fi

  if [[ "$1" == *.png ]]; then
    # PNG conversion
    convert "$1" -quality 75 -define webp:lossless=true $suffix
    convert_status=$?
  else
    # JPG conversion
    convert "$1" -quality 75 $suffix
    convert_status=$?
  fi

  # Check status code & print feedback
  if [[ $convert_status == 0 ]]; then
    # Successful
    ((convert_count+=1))
    if [[ $verbosity > 1 ]]; then
      echo "Converted $1 to $2"
    fi
  else
    # Failed
    ((failed_count+=1))
    if [[ $verbosity > 0 ]]; then
      printf "\x1b[31mFailed to convert $1 to $2\x1b[0m\n"
    fi
    return 1  # Return because we don't need to check file size
  fi

  # Check if webp is actually smaller than original and remove if not
  orig_size=$(stat -c%s "$1")
  if [[ $dry ]]; then
    webp_size=$(stat -c%s $suffix)
  else
    webp_size=$(stat -c%s "$2")
  fi
  if (( webp_size > orig_size )); then
    if [[ ! $dry ]]; then
      rm $2
    fi
    if [[ $verbosity > 0 ]]; then
      printf "\x1b[33mRemoved $2 as it was larger than $1 ($webp_size > $orig_size)\x1b[0m\n"
    fi
    ((larger_count+=1))
    return 1
  fi

  # Calculate file size saving
  saving=$((orig_size - webp_size))
  savings+=($saving)
  ((savings_total+=$saving))
  
  return 0
}

if [[ $mode == 0 ]]; then
  # Convert all images in directory
  if [[ $verbosity > 1 ]]; then
    printf "Converting image files in $directory\n"
  fi
  for file_path in $(find $directory -type f -and \( -iname '*.jpg' -o -iname '*.jpeg' \)); do
    ((found_count+=1))
    webp_path=$(sed 's/.jpe\?g$/.webp/' <<< "$file_path")
    if [[ ! -f "$webp_path"  || $overwrite == 1 ]]; then
      convert_webp $file_path $webp_path
    fi
  done
else
  # Scan php and html files for images to convert
  while read -r srcfile; do
      if [[ $verbosity > 1 ]]; then
        printf "Scanning for image files referenced in $srcfile\n"
      fi
      while IFS= read -r file; do
        ((found_count+=1))
        file_path=$(echo $file | sed "s|^.|$directory/|")
        webp_path=$(sed 's/\.[^.]*$/.webp/' <<< "$file_path")
        
        # Convert if not already converted
        if [[ ! -f "$webp_path"  || $overwrite == 1 ]]; then
          convert_webp $file_path $webp_path
        fi
      done < <(sed -n "s:.*<img src=\"\([^\"]*\.\(jpe\?g\|png\)\)\".*:\1:p" $srcfile)
  done <<< "$(find $directory -type f -and \( -iname "*.php" -o -iname "*.html" \))"
fi

# Calculate statistics
net=$((convert_count-larger_count))
existing=$((found_count-convert_count))
if [[ $net > 0 ]]; then
  savings_avg=$((savings_total/net))
else
  savings_avg=0
fi

# Convert sizes to human-readable units if numfmt available
if command -v numfmt &> /dev/null; then
  savings_total=$(numfmt --to iec $savings_total)
  savings_avg=$(numfmt --to iec $savings_avg)
else
  savings_total="$savings_total B"
  savings_avg="$savings_avg B"
fi

# Print statistics
if [[ $verbosity > 0 ]]; then
  printf "\x1b[32m$found_count found\n"
  if [[ $failed_count > 0 ]]; then
    printf "\x1b[31m$failed_count failed\n"
  fi
  printf "\x1b[32m$convert_count converted
$net smaller than original
$savings_total saved
$savings_avg saved average per file\n\x1b[0m"
fi

# Set exit code
if [[ $failed_count > 0 ]]; then
  exit 1
else
  exit 0
fi
