c5c0201448eb455ef29a25f5a2950d85ea74363f
   1#!/bin/bash
   2#
   3# git-subtree.sh: split/join git repositories in subdirectories of this one
   4#
   5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
   6#
   7if [ $# -eq 0 ]; then
   8    set -- -h
   9fi
  10OPTS_SPEC="\
  11git subtree add   --prefix=<prefix> <commit>
  12git subtree merge --prefix=<prefix> <commit>
  13git subtree pull  --prefix=<prefix> <repository> <refspec...>
  14git subtree split --prefix=<prefix> <commit...>
  15--
  16h,help        show the help
  17q             quiet
  18d             show debug messages
  19prefix=       the name of the subdir to split out
  20 options for 'split'
  21annotate=     add a prefix to commit message of new commits
  22b,branch=     create a new branch from the split subtree
  23ignore-joins  ignore prior --rejoin commits
  24onto=         try connecting new tree to an existing one
  25rejoin        merge the new branch back into HEAD
  26 options for 'add', 'merge', and 'pull'
  27squash        merge subtree changes as a single commit
  28"
  29eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
  30. $(git --exec-path)/git-sh-setup
  31require_work_tree
  32
  33quiet=
  34branch=
  35debug=
  36command=
  37onto=
  38rejoin=
  39ignore_joins=
  40annotate=
  41squash=
  42
  43debug()
  44{
  45        if [ -n "$debug" ]; then
  46                echo "$@" >&2
  47        fi
  48}
  49
  50say()
  51{
  52        if [ -z "$quiet" ]; then
  53                echo "$@" >&2
  54        fi
  55}
  56
  57assert()
  58{
  59        if "$@"; then
  60                :
  61        else
  62                die "assertion failed: " "$@"
  63        fi
  64}
  65
  66
  67#echo "Options: $*"
  68
  69while [ $# -gt 0 ]; do
  70        opt="$1"
  71        shift
  72        case "$opt" in
  73                -q) quiet=1 ;;
  74                -d) debug=1 ;;
  75                --annotate) annotate="$1"; shift ;;
  76                --no-annotate) annotate= ;;
  77                -b) branch="$1"; shift ;;
  78                --prefix) prefix="$1"; shift ;;
  79                --no-prefix) prefix= ;;
  80                --onto) onto="$1"; shift ;;
  81                --no-onto) onto= ;;
  82                --rejoin) rejoin=1 ;;
  83                --no-rejoin) rejoin= ;;
  84                --ignore-joins) ignore_joins=1 ;;
  85                --no-ignore-joins) ignore_joins= ;;
  86                --squash) squash=1 ;;
  87                --no-squash) squash= ;;
  88                --) break ;;
  89                *) die "Unexpected option: $opt" ;;
  90        esac
  91done
  92
  93command="$1"
  94shift
  95case "$command" in
  96        add|merge|pull) default= ;;
  97        split) default="--default HEAD" ;;
  98        *) die "Unknown command '$command'" ;;
  99esac
 100
 101if [ -z "$prefix" ]; then
 102        die "You must provide the --prefix option."
 103fi
 104dir="$prefix"
 105
 106if [ "$command" != "pull" ]; then
 107        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 108        dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
 109        if [ -n "$dirs" ]; then
 110                die "Error: Use --prefix instead of bare filenames."
 111        fi
 112fi
 113
 114debug "command: {$command}"
 115debug "quiet: {$quiet}"
 116debug "revs: {$revs}"
 117debug "dir: {$dir}"
 118debug "opts: {$*}"
 119debug
 120
 121cache_setup()
 122{
 123        cachedir="$GIT_DIR/subtree-cache/$$"
 124        rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
 125        mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
 126        debug "Using cachedir: $cachedir" >&2
 127}
 128
 129cache_get()
 130{
 131        for oldrev in $*; do
 132                if [ -r "$cachedir/$oldrev" ]; then
 133                        read newrev <"$cachedir/$oldrev"
 134                        echo $newrev
 135                fi
 136        done
 137}
 138
 139cache_set()
 140{
 141        oldrev="$1"
 142        newrev="$2"
 143        if [ "$oldrev" != "latest_old" \
 144             -a "$oldrev" != "latest_new" \
 145             -a -e "$cachedir/$oldrev" ]; then
 146                die "cache for $oldrev already exists!"
 147        fi
 148        echo "$newrev" >"$cachedir/$oldrev"
 149}
 150
 151rev_exists()
 152{
 153        if git rev-parse "$1" >/dev/null 2>&1; then
 154                return 0
 155        else
 156                return 1
 157        fi
 158}
 159
 160# if a commit doesn't have a parent, this might not work.  But we only want
 161# to remove the parent from the rev-list, and since it doesn't exist, it won't
 162# be there anyway, so do nothing in that case.
 163try_remove_previous()
 164{
 165        if rev_exists "$1^"; then
 166                echo "^$1^"
 167        fi
 168}
 169
 170find_latest_squash()
 171{
 172        debug "Looking for latest squash ($dir)..."
 173        dir="$1"
 174        sq=
 175        main=
 176        sub=
 177        git log --grep="^git-subtree-dir: $dir\$" \
 178                --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
 179        while read a b junk; do
 180                debug "$a $b $junk"
 181                debug "{{$sq/$main/$sub}}"
 182                case "$a" in
 183                        START) sq="$b" ;;
 184                        git-subtree-mainline:) main="$b" ;;
 185                        git-subtree-split:) sub="$b" ;;
 186                        END)
 187                                if [ -n "$sub" ]; then
 188                                        if [ -n "$main" ]; then
 189                                                # a rejoin commit?
 190                                                # Pretend its sub was a squash.
 191                                                sq="$sub"
 192                                        fi
 193                                        debug "Squash found: $sq $sub"
 194                                        echo "$sq" "$sub"
 195                                        break
 196                                fi
 197                                sq=
 198                                main=
 199                                sub=
 200                                ;;
 201                esac
 202        done
 203}
 204
 205find_existing_splits()
 206{
 207        debug "Looking for prior splits..."
 208        dir="$1"
 209        revs="$2"
 210        main=
 211        sub=
 212        git log --grep="^git-subtree-dir: $dir\$" \
 213                --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
 214        while read a b junk; do
 215                case "$a" in
 216                        START) main="$b"; sq="$b" ;;
 217                        git-subtree-mainline:) main="$b" ;;
 218                        git-subtree-split:) sub="$b" ;;
 219                        END)
 220                                if [ -z "$main" -a -n "$sub" ]; then
 221                                        # squash commits refer to a subtree
 222                                        cache_set "$sq" "$sub"
 223                                fi
 224                                if [ -n "$main" -a -n "$sub" ]; then
 225                                        debug "  Prior: $main -> $sub"
 226                                        cache_set $main $sub
 227                                        try_remove_previous "$main"
 228                                        try_remove_previous "$sub"
 229                                fi
 230                                main=
 231                                sub=
 232                                ;;
 233                esac
 234        done
 235}
 236
 237copy_commit()
 238{
 239        # We're going to set some environment vars here, so
 240        # do it in a subshell to get rid of them safely later
 241        debug copy_commit "{$1}" "{$2}" "{$3}"
 242        git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
 243        (
 244                read GIT_AUTHOR_NAME
 245                read GIT_AUTHOR_EMAIL
 246                read GIT_AUTHOR_DATE
 247                read GIT_COMMITTER_NAME
 248                read GIT_COMMITTER_EMAIL
 249                read GIT_COMMITTER_DATE
 250                export  GIT_AUTHOR_NAME \
 251                        GIT_AUTHOR_EMAIL \
 252                        GIT_AUTHOR_DATE \
 253                        GIT_COMMITTER_NAME \
 254                        GIT_COMMITTER_EMAIL \
 255                        GIT_COMMITTER_DATE
 256                (echo -n "$annotate"; cat ) |
 257                git commit-tree "$2" $3  # reads the rest of stdin
 258        ) || die "Can't copy commit $1"
 259}
 260
 261add_msg()
 262{
 263        dir="$1"
 264        latest_old="$2"
 265        latest_new="$3"
 266        cat <<-EOF
 267                Add '$dir/' from commit '$latest_new'
 268                
 269                git-subtree-dir: $dir
 270                git-subtree-mainline: $latest_old
 271                git-subtree-split: $latest_new
 272        EOF
 273}
 274
 275rejoin_msg()
 276{
 277        dir="$1"
 278        latest_old="$2"
 279        latest_new="$3"
 280        cat <<-EOF
 281                Split '$dir/' into commit '$latest_new'
 282                
 283                git-subtree-dir: $dir
 284                git-subtree-mainline: $latest_old
 285                git-subtree-split: $latest_new
 286        EOF
 287}
 288
 289squash_msg()
 290{
 291        dir="$1"
 292        oldsub="$2"
 293        newsub="$3"
 294        newsub_short=$(git rev-parse --short "$newsub")
 295        
 296        if [ -n "$oldsub" ]; then
 297                oldsub_short=$(git rev-parse --short "$oldsub")
 298                echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
 299                echo
 300                git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
 301                git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
 302        else
 303                echo "Squashed '$dir/' content from commit $newsub_short"
 304        fi
 305        
 306        echo
 307        echo "git-subtree-dir: $dir"
 308        echo "git-subtree-split: $newsub"
 309}
 310
 311toptree_for_commit()
 312{
 313        commit="$1"
 314        git log -1 --pretty=format:'%T' "$commit" -- || exit $?
 315}
 316
 317subtree_for_commit()
 318{
 319        commit="$1"
 320        dir="$2"
 321        git ls-tree "$commit" -- "$dir" |
 322        while read mode type tree name; do
 323                assert [ "$name" = "$dir" ]
 324                echo $tree
 325                break
 326        done
 327}
 328
 329tree_changed()
 330{
 331        tree=$1
 332        shift
 333        if [ $# -ne 1 ]; then
 334                return 0   # weird parents, consider it changed
 335        else
 336                ptree=$(toptree_for_commit $1)
 337                if [ "$ptree" != "$tree" ]; then
 338                        return 0   # changed
 339                else
 340                        return 1   # not changed
 341                fi
 342        fi
 343}
 344
 345new_squash_commit()
 346{
 347        old="$1"
 348        oldsub="$2"
 349        newsub="$3"
 350        tree=$(toptree_for_commit $newsub) || exit $?
 351        if [ -n "$old" ]; then
 352                squash_msg "$dir" "$oldsub" "$newsub" | 
 353                        git commit-tree "$tree" -p "$old" || exit $?
 354        else
 355                squash_msg "$dir" "" "$newsub" |
 356                        git commit-tree "$tree" || exit $?
 357        fi
 358}
 359
 360copy_or_skip()
 361{
 362        rev="$1"
 363        tree="$2"
 364        newparents="$3"
 365        assert [ -n "$tree" ]
 366
 367        identical=
 368        nonidentical=
 369        p=
 370        gotparents=
 371        for parent in $newparents; do
 372                ptree=$(toptree_for_commit $parent) || exit $?
 373                [ -z "$ptree" ] && continue
 374                if [ "$ptree" = "$tree" ]; then
 375                        # an identical parent could be used in place of this rev.
 376                        identical="$parent"
 377                else
 378                        nonidentical="$parent"
 379                fi
 380                
 381                # sometimes both old parents map to the same newparent;
 382                # eliminate duplicates
 383                is_new=1
 384                for gp in $gotparents; do
 385                        if [ "$gp" = "$parent" ]; then
 386                                is_new=
 387                                break
 388                        fi
 389                done
 390                if [ -n "$is_new" ]; then
 391                        gotparents="$gotparents $parent"
 392                        p="$p -p $parent"
 393                fi
 394        done
 395        
 396        if [ -n "$identical" ]; then
 397                echo $identical
 398        else
 399                copy_commit $rev $tree "$p" || exit $?
 400        fi
 401}
 402
 403ensure_clean()
 404{
 405        if ! git diff-index HEAD --exit-code --quiet; then
 406                die "Working tree has modifications.  Cannot add."
 407        fi
 408        if ! git diff-index --cached HEAD --exit-code --quiet; then
 409                die "Index has modifications.  Cannot add."
 410        fi
 411}
 412
 413cmd_add()
 414{
 415        if [ -e "$dir" ]; then
 416                die "'$dir' already exists.  Cannot add."
 417        fi
 418        ensure_clean
 419        
 420        set -- $revs
 421        if [ $# -ne 1 ]; then
 422                die "You must provide exactly one revision.  Got: '$revs'"
 423        fi
 424        rev="$1"
 425        
 426        debug "Adding $dir as '$rev'..."
 427        git read-tree --prefix="$dir" $rev || exit $?
 428        git checkout "$dir" || exit $?
 429        tree=$(git write-tree) || exit $?
 430        
 431        headrev=$(git rev-parse HEAD) || exit $?
 432        if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
 433                headp="-p $headrev"
 434        else
 435                headp=
 436        fi
 437        
 438        if [ -n "$squash" ]; then
 439                rev=$(new_squash_commit "" "" "$rev") || exit $?
 440                commit=$(echo "Merge commit '$rev' as '$dir'" |
 441                         git commit-tree $tree $headp -p "$rev") || exit $?
 442        else
 443                commit=$(add_msg "$dir" "$headrev" "$rev" |
 444                         git commit-tree $tree $headp -p "$rev") || exit $?
 445        fi
 446        git reset "$commit" || exit $?
 447        
 448        say "Added dir '$dir'"
 449}
 450
 451cmd_split()
 452{
 453        if [ -n "$branch" ] && rev_exists "refs/heads/$branch"; then
 454                die "Branch '$branch' already exists."
 455        fi
 456
 457        debug "Splitting $dir..."
 458        cache_setup || exit $?
 459        
 460        if [ -n "$onto" ]; then
 461                debug "Reading history for --onto=$onto..."
 462                git rev-list $onto |
 463                while read rev; do
 464                        # the 'onto' history is already just the subdir, so
 465                        # any parent we find there can be used verbatim
 466                        debug "  cache: $rev"
 467                        cache_set $rev $rev
 468                done
 469        fi
 470        
 471        if [ -n "$ignore_joins" ]; then
 472                unrevs=
 473        else
 474                unrevs="$(find_existing_splits "$dir" "$revs")"
 475        fi
 476        
 477        # We can't restrict rev-list to only $dir here, because some of our
 478        # parents have the $dir contents the root, and those won't match.
 479        # (and rev-list --follow doesn't seem to solve this)
 480        grl='git rev-list --reverse --parents $revs $unrevs'
 481        revmax=$(eval "$grl" | wc -l)
 482        revcount=0
 483        createcount=0
 484        eval "$grl" |
 485        while read rev parents; do
 486                revcount=$(($revcount + 1))
 487                say -n "$revcount/$revmax ($createcount)
"
 488                debug "Processing commit: $rev"
 489                exists=$(cache_get $rev)
 490                if [ -n "$exists" ]; then
 491                        debug "  prior: $exists"
 492                        continue
 493                fi
 494                createcount=$(($createcount + 1))
 495                debug "  parents: $parents"
 496                newparents=$(cache_get $parents)
 497                debug "  newparents: $newparents"
 498                
 499                tree=$(subtree_for_commit $rev "$dir")
 500                debug "  tree is: $tree"
 501                
 502                # ugly.  is there no better way to tell if this is a subtree
 503                # vs. a mainline commit?  Does it matter?
 504                [ -z $tree ] && continue
 505
 506                newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
 507                debug "  newrev is: $newrev"
 508                cache_set $rev $newrev
 509                cache_set latest_new $newrev
 510                cache_set latest_old $rev
 511        done || exit $?
 512        latest_new=$(cache_get latest_new)
 513        if [ -z "$latest_new" ]; then
 514                die "No new revisions were found"
 515        fi
 516        
 517        if [ -n "$rejoin" ]; then
 518                debug "Merging split branch into HEAD..."
 519                latest_old=$(cache_get latest_old)
 520                git merge -s ours \
 521                        -m "$(rejoin_msg $dir $latest_old $latest_new)" \
 522                        $latest_new >&2 || exit $?
 523        fi
 524        if [ -n "$branch" ]; then
 525                git update-ref -m 'subtree split' "refs/heads/$branch" \
 526                        $latest_new "" || exit $?
 527                say "Created branch '$branch'"
 528        fi
 529        echo $latest_new
 530        exit 0
 531}
 532
 533cmd_merge()
 534{
 535        ensure_clean
 536        
 537        set -- $revs
 538        if [ $# -ne 1 ]; then
 539                die "You must provide exactly one revision.  Got: '$revs'"
 540        fi
 541        rev="$1"
 542        
 543        if [ -n "$squash" ]; then
 544                first_split="$(find_latest_squash "$dir")"
 545                if [ -z "$first_split" ]; then
 546                        die "Can't squash-merge: '$dir' was never added."
 547                fi
 548                set $first_split
 549                old=$1
 550                sub=$2
 551                if [ "$sub" = "$rev" ]; then
 552                        say "Subtree is already at commit $rev."
 553                        exit 0
 554                fi
 555                new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
 556                debug "New squash commit: $new"
 557                rev="$new"
 558        fi
 559        
 560        git merge -s subtree $rev
 561}
 562
 563cmd_pull()
 564{
 565        ensure_clean
 566        set -x
 567        git pull -s subtree "$@"
 568}
 569
 570"cmd_$command" "$@"