git-subtree.shon commit debug messages are off by default; use -d to enable. (942dce5)
   1#!/bin/bash
   2#
   3# git-subtree.sh: split/join git repositories in subdirectories of this one
   4#
   5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
   6#
   7if [ $# -eq 0 ]; then
   8    set -- -h
   9fi
  10OPTS_SPEC="\
  11git subtree add --prefix=<prefix> <commit>
  12git subtree split [options...] --prefix=<prefix> <commit...>
  13git subtree merge --prefix=<prefix> <commit>
  14git subtree pull  --prefix=<prefix> <repository> <refspec...>
  15--
  16h,help        show the help
  17q             quiet
  18d             show debug messages
  19prefix=       the name of the subdir to split out
  20 options for 'split'
  21annotate=     add a prefix to commit message of new commits
  22onto=         try connecting new tree to an existing one
  23rejoin        merge the new branch back into HEAD
  24ignore-joins  ignore prior --rejoin commits
  25"
  26eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
  27. git-sh-setup
  28require_work_tree
  29
  30quiet=
  31debug=
  32command=
  33onto=
  34rejoin=
  35ignore_joins=
  36annotate=
  37
  38debug()
  39{
  40        if [ -n "$debug" ]; then
  41                echo "$@" >&2
  42        fi
  43}
  44
  45say()
  46{
  47        if [ -z "$quiet" ]; then
  48                echo "$@" >&2
  49        fi
  50}
  51
  52assert()
  53{
  54        if "$@"; then
  55                :
  56        else
  57                die "assertion failed: " "$@"
  58        fi
  59}
  60
  61
  62#echo "Options: $*"
  63
  64while [ $# -gt 0 ]; do
  65        opt="$1"
  66        shift
  67        case "$opt" in
  68                -q) quiet=1 ;;
  69                -d) debug=1 ;;
  70                --annotate) annotate="$1"; shift ;;
  71                --no-annotate) annotate= ;;
  72                --prefix) prefix="$1"; shift ;;
  73                --no-prefix) prefix= ;;
  74                --onto) onto="$1"; shift ;;
  75                --no-onto) onto= ;;
  76                --rejoin) rejoin=1 ;;
  77                --no-rejoin) rejoin= ;;
  78                --ignore-joins) ignore_joins=1 ;;
  79                --no-ignore-joins) ignore_joins= ;;
  80                --) break ;;
  81        esac
  82done
  83
  84command="$1"
  85shift
  86case "$command" in
  87        add|merge|pull) default= ;;
  88        split) default="--default HEAD" ;;
  89        *) die "Unknown command '$command'" ;;
  90esac
  91
  92if [ -z "$prefix" ]; then
  93        die "You must provide the --prefix option."
  94fi
  95dir="$prefix"
  96
  97if [ "$command" != "pull" ]; then
  98        revs=$(git rev-parse $default --revs-only "$@") || exit $?
  99        dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
 100        if [ -n "$dirs" ]; then
 101                die "Error: Use --prefix instead of bare filenames."
 102        fi
 103fi
 104
 105debug "command: {$command}"
 106debug "quiet: {$quiet}"
 107debug "revs: {$revs}"
 108debug "dir: {$dir}"
 109debug "opts: {$*}"
 110debug
 111
 112cache_setup()
 113{
 114        cachedir="$GIT_DIR/subtree-cache/$$"
 115        rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
 116        mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
 117        debug "Using cachedir: $cachedir" >&2
 118}
 119
 120cache_get()
 121{
 122        for oldrev in $*; do
 123                if [ -r "$cachedir/$oldrev" ]; then
 124                        read newrev <"$cachedir/$oldrev"
 125                        echo $newrev
 126                fi
 127        done
 128}
 129
 130cache_set()
 131{
 132        oldrev="$1"
 133        newrev="$2"
 134        if [ "$oldrev" != "latest_old" \
 135             -a "$oldrev" != "latest_new" \
 136             -a -e "$cachedir/$oldrev" ]; then
 137                die "cache for $oldrev already exists!"
 138        fi
 139        echo "$newrev" >"$cachedir/$oldrev"
 140}
 141
 142# if a commit doesn't have a parent, this might not work.  But we only want
 143# to remove the parent from the rev-list, and since it doesn't exist, it won't
 144# be there anyway, so do nothing in that case.
 145try_remove_previous()
 146{
 147        if git rev-parse "$1^" >/dev/null 2>&1; then
 148                echo "^$1^"
 149        fi
 150}
 151
 152find_existing_splits()
 153{
 154        debug "Looking for prior splits..."
 155        dir="$1"
 156        revs="$2"
 157        git log --grep="^git-subtree-dir: $dir\$" \
 158                --pretty=format:'%s%n%n%b%nEND' $revs |
 159        while read a b junk; do
 160                case "$a" in
 161                        git-subtree-mainline:) main="$b" ;;
 162                        git-subtree-split:) sub="$b" ;;
 163                        *)
 164                                if [ -n "$main" -a -n "$sub" ]; then
 165                                        debug "  Prior: $main -> $sub"
 166                                        cache_set $main $sub
 167                                        try_remove_previous "$main"
 168                                        try_remove_previous "$sub"
 169                                        main=
 170                                        sub=
 171                                fi
 172                                ;;
 173                esac
 174        done
 175}
 176
 177copy_commit()
 178{
 179        # We're doing to set some environment vars here, so
 180        # do it in a subshell to get rid of them safely later
 181        debug copy_commit "{$1}" "{$2}" "{$3}"
 182        git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
 183        (
 184                read GIT_AUTHOR_NAME
 185                read GIT_AUTHOR_EMAIL
 186                read GIT_AUTHOR_DATE
 187                read GIT_COMMITTER_NAME
 188                read GIT_COMMITTER_EMAIL
 189                read GIT_COMMITTER_DATE
 190                export  GIT_AUTHOR_NAME \
 191                        GIT_AUTHOR_EMAIL \
 192                        GIT_AUTHOR_DATE \
 193                        GIT_COMMITTER_NAME \
 194                        GIT_COMMITTER_EMAIL \
 195                        GIT_COMMITTER_DATE
 196                (echo -n "$annotate"; cat ) |
 197                git commit-tree "$2" $3  # reads the rest of stdin
 198        ) || die "Can't copy commit $1"
 199}
 200
 201add_msg()
 202{
 203        dir="$1"
 204        latest_old="$2"
 205        latest_new="$3"
 206        cat <<-EOF
 207                Add '$dir/' from commit '$latest_new'
 208                
 209                git-subtree-dir: $dir
 210                git-subtree-mainline: $latest_old
 211                git-subtree-split: $latest_new
 212        EOF
 213}
 214
 215merge_msg()
 216{
 217        dir="$1"
 218        latest_old="$2"
 219        latest_new="$3"
 220        cat <<-EOF
 221                Split '$dir/' into commit '$latest_new'
 222                
 223                git-subtree-dir: $dir
 224                git-subtree-mainline: $latest_old
 225                git-subtree-split: $latest_new
 226        EOF
 227}
 228
 229toptree_for_commit()
 230{
 231        commit="$1"
 232        git log -1 --pretty=format:'%T' "$commit" -- || exit $?
 233}
 234
 235subtree_for_commit()
 236{
 237        commit="$1"
 238        dir="$2"
 239        git ls-tree "$commit" -- "$dir" |
 240        while read mode type tree name; do
 241                assert [ "$name" = "$dir" ]
 242                echo $tree
 243                break
 244        done
 245}
 246
 247tree_changed()
 248{
 249        tree=$1
 250        shift
 251        if [ $# -ne 1 ]; then
 252                return 0   # weird parents, consider it changed
 253        else
 254                ptree=$(toptree_for_commit $1)
 255                if [ "$ptree" != "$tree" ]; then
 256                        return 0   # changed
 257                else
 258                        return 1   # not changed
 259                fi
 260        fi
 261}
 262
 263copy_or_skip()
 264{
 265        rev="$1"
 266        tree="$2"
 267        newparents="$3"
 268        assert [ -n "$tree" ]
 269
 270        identical=
 271        nonidentical=
 272        p=
 273        gotparents=
 274        for parent in $newparents; do
 275                ptree=$(toptree_for_commit $parent) || exit $?
 276                [ -z "$ptree" ] && continue
 277                if [ "$ptree" = "$tree" ]; then
 278                        # an identical parent could be used in place of this rev.
 279                        identical="$parent"
 280                else
 281                        nonidentical="$parent"
 282                fi
 283                
 284                # sometimes both old parents map to the same newparent;
 285                # eliminate duplicates
 286                is_new=1
 287                for gp in $gotparents; do
 288                        if [ "$gp" = "$parent" ]; then
 289                                is_new=
 290                                break
 291                        fi
 292                done
 293                if [ -n "$is_new" ]; then
 294                        gotparents="$gotparents $parent"
 295                        p="$p -p $parent"
 296                fi
 297        done
 298        
 299        if [ -n "$identical" ]; then
 300                echo $identical
 301        else
 302                copy_commit $rev $tree "$p" || exit $?
 303        fi
 304}
 305
 306ensure_clean()
 307{
 308        if ! git diff-index HEAD --exit-code --quiet; then
 309                die "Working tree has modifications.  Cannot add."
 310        fi
 311        if ! git diff-index --cached HEAD --exit-code --quiet; then
 312                die "Index has modifications.  Cannot add."
 313        fi
 314}
 315
 316cmd_add()
 317{
 318        if [ -e "$dir" ]; then
 319                die "'$dir' already exists.  Cannot add."
 320        fi
 321        ensure_clean
 322        
 323        set -- $revs
 324        if [ $# -ne 1 ]; then
 325                die "You must provide exactly one revision.  Got: '$revs'"
 326        fi
 327        rev="$1"
 328        
 329        debug "Adding $dir as '$rev'..."
 330        git read-tree --prefix="$dir" $rev || exit $?
 331        git checkout "$dir" || exit $?
 332        tree=$(git write-tree) || exit $?
 333        
 334        headrev=$(git rev-parse HEAD) || exit $?
 335        if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
 336                headp="-p $headrev"
 337        else
 338                headp=
 339        fi
 340        commit=$(add_msg "$dir" "$headrev" "$rev" |
 341                 git commit-tree $tree $headp -p "$rev") || exit $?
 342        git reset "$commit" || exit $?
 343}
 344
 345cmd_split()
 346{
 347        debug "Splitting $dir..."
 348        cache_setup || exit $?
 349        
 350        if [ -n "$onto" ]; then
 351                debug "Reading history for --onto=$onto..."
 352                git rev-list $onto |
 353                while read rev; do
 354                        # the 'onto' history is already just the subdir, so
 355                        # any parent we find there can be used verbatim
 356                        debug "  cache: $rev"
 357                        cache_set $rev $rev
 358                done
 359        fi
 360        
 361        if [ -n "$ignore_joins" ]; then
 362                unrevs=
 363        else
 364                unrevs="$(find_existing_splits "$dir" "$revs")"
 365        fi
 366        
 367        # We can't restrict rev-list to only $dir here, because some of our
 368        # parents have the $dir contents the root, and those won't match.
 369        # (and rev-list --follow doesn't seem to solve this)
 370        grl='git rev-list --reverse --parents $revs $unrevs'
 371        revmax=$(eval "$grl" | wc -l)
 372        revcount=0
 373        createcount=0
 374        eval "$grl" |
 375        while read rev parents; do
 376                revcount=$(($revcount + 1))
 377                say -n "$revcount/$revmax ($createcount)
"
 378                debug "Processing commit: $rev"
 379                exists=$(cache_get $rev)
 380                if [ -n "$exists" ]; then
 381                        debug "  prior: $exists"
 382                        continue
 383                fi
 384                createcount=$(($createcount + 1))
 385                debug "  parents: $parents"
 386                newparents=$(cache_get $parents)
 387                debug "  newparents: $newparents"
 388                
 389                tree=$(subtree_for_commit $rev "$dir")
 390                debug "  tree is: $tree"
 391                [ -z $tree ] && continue
 392
 393                newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
 394                debug "  newrev is: $newrev"
 395                cache_set $rev $newrev
 396                cache_set latest_new $newrev
 397                cache_set latest_old $rev
 398        done || exit $?
 399        latest_new=$(cache_get latest_new)
 400        if [ -z "$latest_new" ]; then
 401                die "No new revisions were found"
 402        fi
 403        
 404        if [ -n "$rejoin" ]; then
 405                debug "Merging split branch into HEAD..."
 406                latest_old=$(cache_get latest_old)
 407                git merge -s ours \
 408                        -m "$(merge_msg $dir $latest_old $latest_new)" \
 409                        $latest_new >&2
 410        fi
 411        echo $latest_new
 412        exit 0
 413}
 414
 415cmd_merge()
 416{
 417        ensure_clean
 418        
 419        set -- $revs
 420        if [ $# -ne 1 ]; then
 421                die "You must provide exactly one revision.  Got: '$revs'"
 422        fi
 423        rev="$1"
 424        
 425        git merge -s subtree $rev
 426}
 427
 428cmd_pull()
 429{
 430        ensure_clean
 431        set -x
 432        git pull -s subtree "$@"
 433}
 434
 435"cmd_$command" "$@"