git-deltafy-scripton commit git-pack-objects: write the pack files with a SHA1 csum (c38138c)
   1#!/bin/bash
   2
   3# Example script to deltify an entire GIT repository based on the commit list.
   4# The most recent version of a file is the reference and previous versions
   5# are made delta against the best earlier version available. And so on for
   6# successive versions going back in time.  This way the increasing delta
   7# overhead is pushed towards older versions of any given file.
   8#
   9# The -d argument allows to provide a limit on the delta chain depth.
  10# If 0 is passed then everything is undeltafied.  Limiting the delta
  11# depth is meaningful for subsequent access performance to old revisions.
  12# A value of 16 might be a good compromize between performance and good
  13# space saving.  Current default is unbounded.
  14#
  15# The --max-behind=30 argument is passed to git-mkdelta so to keep
  16# combinations and memory usage bounded a bit.  If you have lots of memory
  17# and CPU power you may remove it (or set to 0) to let git-mkdelta find the
  18# best delta match regardless of the number of revisions for a given file.
  19# You can also make the value smaller to make it faster and less
  20# memory hungry.  A value of 5 ought to still give pretty good results.
  21# When set to 0 or ommitted then look behind is unbounded.  Note that
  22# git-mkdelta might die with a segmentation fault in that case if it
  23# runs out of memory.  Note that the GIT repository will still be consistent
  24# even if git-mkdelta dies unexpectedly.
  25
  26set -e
  27
  28max_depth=
  29[ "$1" == "-d" ] && max_depth="--max-depth=$2" && shift 2
  30
  31overlap=30
  32max_behind="--max-behind=$overlap"
  33
  34function process_list() {
  35        if [ "$list" ]; then
  36                echo "Processing $curr_file"
  37                echo "$list" | xargs git-mkdelta $max_depth $max_behind -v
  38        fi
  39}
  40
  41rev_list=""
  42curr_file=""
  43
  44git-rev-list HEAD |
  45while true; do
  46        # Let's batch revisions into groups of 1000 to give it a chance to
  47        # scale with repositories containing long revision lists.  We also
  48        # overlap with the previous batch the size of mkdelta's look behind
  49        # value in order to account for the processing discontinuity.
  50        rev_list="$(echo -e -n "$rev_list" | tail --lines=$overlap)"
  51        for i in $(seq 1000); do
  52                read rev || break
  53                rev_list="$rev_list$rev\n"
  54        done
  55        echo -e -n "$rev_list" |
  56        git-diff-tree -r -t --stdin |
  57        awk '/^:/ { if ($5 == "M") printf "%s %s\n%s %s\n", $4, $6, $3, $6 }' |
  58        LC_ALL=C sort -s -k 2 | uniq |
  59        while read sha1 file; do
  60                if [ "$file" == "$curr_file" ]; then
  61                        list="$list $sha1"
  62                else
  63                        process_list
  64                        curr_file="$file"
  65                        list="$sha1"
  66                fi
  67        done
  68        [ "$rev" ] || break
  69done
  70process_list
  71
  72curr_file="root directory"
  73list="$(
  74        git-rev-list HEAD |
  75        while read commit; do
  76                git-cat-file commit $commit |
  77                sed -n 's/tree //p;Q'
  78        done
  79        )"
  80process_list
  81