git-subtree.shon commit Jakub's changes broke the progress message slightly. (e2d0a45)
   1#!/bin/bash
   2#
   3# git-subtree.sh: split/join git repositories in subdirectories of this one
   4#
   5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
   6#
   7if [ $# -eq 0 ]; then
   8    set -- -h
   9fi
  10OPTS_SPEC="\
  11git subtree add   --prefix=<prefix> <commit>
  12git subtree merge --prefix=<prefix> <commit>
  13git subtree pull  --prefix=<prefix> <repository> <refspec...>
  14git subtree split --prefix=<prefix> <commit...>
  15--
  16h,help        show the help
  17q             quiet
  18d             show debug messages
  19P,prefix=     the name of the subdir to split out
  20m,message=    use the given message as the commit message for the merge commit
  21 options for 'split'
  22annotate=     add a prefix to commit message of new commits
  23b,branch=     create a new branch from the split subtree
  24ignore-joins  ignore prior --rejoin commits
  25onto=         try connecting new tree to an existing one
  26rejoin        merge the new branch back into HEAD
  27 options for 'add', 'merge', and 'pull'
  28squash        merge subtree changes as a single commit
  29"
  30eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
  31PATH=$(git --exec-path):$PATH
  32. git-sh-setup
  33require_work_tree
  34
  35quiet=
  36branch=
  37debug=
  38command=
  39onto=
  40rejoin=
  41ignore_joins=
  42annotate=
  43squash=
  44message=
  45
  46debug()
  47{
  48        if [ -n "$debug" ]; then
  49                echo "$@" >&2
  50        fi
  51}
  52
  53say()
  54{
  55        if [ -z "$quiet" ]; then
  56                echo "$@" >&2
  57        fi
  58}
  59
  60assert()
  61{
  62        if "$@"; then
  63                :
  64        else
  65                die "assertion failed: " "$@"
  66        fi
  67}
  68
  69
  70#echo "Options: $*"
  71
  72while [ $# -gt 0 ]; do
  73        opt="$1"
  74        shift
  75        case "$opt" in
  76                -q) quiet=1 ;;
  77                -d) debug=1 ;;
  78                --annotate) annotate="$1"; shift ;;
  79                --no-annotate) annotate= ;;
  80                -b) branch="$1"; shift ;;
  81                -P) prefix="$1"; shift ;;
  82                -m) message="$1"; shift ;;
  83                --no-prefix) prefix= ;;
  84                --onto) onto="$1"; shift ;;
  85                --no-onto) onto= ;;
  86                --rejoin) rejoin=1 ;;
  87                --no-rejoin) rejoin= ;;
  88                --ignore-joins) ignore_joins=1 ;;
  89                --no-ignore-joins) ignore_joins= ;;
  90                --squash) squash=1 ;;
  91                --no-squash) squash= ;;
  92                --) break ;;
  93                *) die "Unexpected option: $opt" ;;
  94        esac
  95done
  96
  97command="$1"
  98shift
  99case "$command" in
 100        add|merge|pull) default= ;;
 101        split) default="--default HEAD" ;;
 102        *) die "Unknown command '$command'" ;;
 103esac
 104
 105if [ -z "$prefix" ]; then
 106        die "You must provide the --prefix option."
 107fi
 108dir="$(dirname "$prefix/.")"
 109
 110if [ "$command" != "pull" ]; then
 111        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 112        dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
 113        if [ -n "$dirs" ]; then
 114                die "Error: Use --prefix instead of bare filenames."
 115        fi
 116fi
 117
 118debug "command: {$command}"
 119debug "quiet: {$quiet}"
 120debug "revs: {$revs}"
 121debug "dir: {$dir}"
 122debug "opts: {$*}"
 123debug
 124
 125cache_setup()
 126{
 127        cachedir="$GIT_DIR/subtree-cache/$$"
 128        rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
 129        mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
 130        debug "Using cachedir: $cachedir" >&2
 131}
 132
 133cache_get()
 134{
 135        for oldrev in $*; do
 136                if [ -r "$cachedir/$oldrev" ]; then
 137                        read newrev <"$cachedir/$oldrev"
 138                        echo $newrev
 139                fi
 140        done
 141}
 142
 143cache_set()
 144{
 145        oldrev="$1"
 146        newrev="$2"
 147        if [ "$oldrev" != "latest_old" \
 148             -a "$oldrev" != "latest_new" \
 149             -a -e "$cachedir/$oldrev" ]; then
 150                die "cache for $oldrev already exists!"
 151        fi
 152        echo "$newrev" >"$cachedir/$oldrev"
 153}
 154
 155rev_exists()
 156{
 157        if git rev-parse "$1" >/dev/null 2>&1; then
 158                return 0
 159        else
 160                return 1
 161        fi
 162}
 163
 164rev_is_descendant_of_branch()
 165{
 166        newrev="$1"
 167        branch="$2"
 168        branch_hash=$(git rev-parse $branch)
 169        match=$(git rev-list -1 $branch_hash ^$newrev)
 170
 171        if [ -z "$match" ]; then
 172                return 0
 173        else
 174                return 1
 175        fi
 176}
 177
 178# if a commit doesn't have a parent, this might not work.  But we only want
 179# to remove the parent from the rev-list, and since it doesn't exist, it won't
 180# be there anyway, so do nothing in that case.
 181try_remove_previous()
 182{
 183        if rev_exists "$1^"; then
 184                echo "^$1^"
 185        fi
 186}
 187
 188find_latest_squash()
 189{
 190        debug "Looking for latest squash ($dir)..."
 191        dir="$1"
 192        sq=
 193        main=
 194        sub=
 195        git log --grep="^git-subtree-dir: $dir/*\$" \
 196                --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
 197        while read a b junk; do
 198                debug "$a $b $junk"
 199                debug "{{$sq/$main/$sub}}"
 200                case "$a" in
 201                        START) sq="$b" ;;
 202                        git-subtree-mainline:) main="$b" ;;
 203                        git-subtree-split:) sub="$b" ;;
 204                        END)
 205                                if [ -n "$sub" ]; then
 206                                        if [ -n "$main" ]; then
 207                                                # a rejoin commit?
 208                                                # Pretend its sub was a squash.
 209                                                sq="$sub"
 210                                        fi
 211                                        debug "Squash found: $sq $sub"
 212                                        echo "$sq" "$sub"
 213                                        break
 214                                fi
 215                                sq=
 216                                main=
 217                                sub=
 218                                ;;
 219                esac
 220        done
 221}
 222
 223find_existing_splits()
 224{
 225        debug "Looking for prior splits..."
 226        dir="$1"
 227        revs="$2"
 228        main=
 229        sub=
 230        git log --grep="^git-subtree-dir: $dir/*\$" \
 231                --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
 232        while read a b junk; do
 233                case "$a" in
 234                        START) sq="$b" ;;
 235                        git-subtree-mainline:) main="$b" ;;
 236                        git-subtree-split:) sub="$b" ;;
 237                        END)
 238                                debug "  Main is: '$main'"
 239                                if [ -z "$main" -a -n "$sub" ]; then
 240                                        # squash commits refer to a subtree
 241                                        debug "  Squash: $sq from $sub"
 242                                        cache_set "$sq" "$sub"
 243                                fi
 244                                if [ -n "$main" -a -n "$sub" ]; then
 245                                        debug "  Prior: $main -> $sub"
 246                                        cache_set $main $sub
 247                                        try_remove_previous "$main"
 248                                        try_remove_previous "$sub"
 249                                fi
 250                                main=
 251                                sub=
 252                                ;;
 253                esac
 254        done
 255}
 256
 257copy_commit()
 258{
 259        # We're going to set some environment vars here, so
 260        # do it in a subshell to get rid of them safely later
 261        debug copy_commit "{$1}" "{$2}" "{$3}"
 262        git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
 263        (
 264                read GIT_AUTHOR_NAME
 265                read GIT_AUTHOR_EMAIL
 266                read GIT_AUTHOR_DATE
 267                read GIT_COMMITTER_NAME
 268                read GIT_COMMITTER_EMAIL
 269                read GIT_COMMITTER_DATE
 270                export  GIT_AUTHOR_NAME \
 271                        GIT_AUTHOR_EMAIL \
 272                        GIT_AUTHOR_DATE \
 273                        GIT_COMMITTER_NAME \
 274                        GIT_COMMITTER_EMAIL \
 275                        GIT_COMMITTER_DATE
 276                (echo -n "$annotate"; cat ) |
 277                git commit-tree "$2" $3  # reads the rest of stdin
 278        ) || die "Can't copy commit $1"
 279}
 280
 281add_msg()
 282{
 283        dir="$1"
 284        latest_old="$2"
 285        latest_new="$3"
 286        if [ -n "$message" ]; then
 287                commit_message="$message"
 288        else
 289                commit_message="Add '$dir/' from commit '$latest_new'"
 290        fi
 291        cat <<-EOF
 292                $commit_message
 293                
 294                git-subtree-dir: $dir
 295                git-subtree-mainline: $latest_old
 296                git-subtree-split: $latest_new
 297        EOF
 298}
 299
 300add_squashed_msg()
 301{
 302        if [ -n "$message" ]; then
 303                echo "$message"
 304        else
 305                echo "Merge commit '$1' as '$2'"
 306        fi
 307}
 308
 309rejoin_msg()
 310{
 311        dir="$1"
 312        latest_old="$2"
 313        latest_new="$3"
 314        if [ -n "$message" ]; then
 315                commit_message="$message"
 316        else
 317                commit_message="Split '$dir/' into commit '$latest_new'"
 318        fi
 319        cat <<-EOF
 320                $commit_message
 321                
 322                git-subtree-dir: $dir
 323                git-subtree-mainline: $latest_old
 324                git-subtree-split: $latest_new
 325        EOF
 326}
 327
 328squash_msg()
 329{
 330        dir="$1"
 331        oldsub="$2"
 332        newsub="$3"
 333        newsub_short=$(git rev-parse --short "$newsub")
 334        
 335        if [ -n "$oldsub" ]; then
 336                oldsub_short=$(git rev-parse --short "$oldsub")
 337                echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
 338                echo
 339                git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
 340                git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
 341        else
 342                echo "Squashed '$dir/' content from commit $newsub_short"
 343        fi
 344        
 345        echo
 346        echo "git-subtree-dir: $dir"
 347        echo "git-subtree-split: $newsub"
 348}
 349
 350toptree_for_commit()
 351{
 352        commit="$1"
 353        git log -1 --pretty=format:'%T' "$commit" -- || exit $?
 354}
 355
 356subtree_for_commit()
 357{
 358        commit="$1"
 359        dir="$2"
 360        git ls-tree "$commit" -- "$dir" |
 361        while read mode type tree name; do
 362                assert [ "$name" = "$dir" ]
 363                assert [ "$type" = "tree" ]
 364                echo $tree
 365                break
 366        done
 367}
 368
 369tree_changed()
 370{
 371        tree=$1
 372        shift
 373        if [ $# -ne 1 ]; then
 374                return 0   # weird parents, consider it changed
 375        else
 376                ptree=$(toptree_for_commit $1)
 377                if [ "$ptree" != "$tree" ]; then
 378                        return 0   # changed
 379                else
 380                        return 1   # not changed
 381                fi
 382        fi
 383}
 384
 385new_squash_commit()
 386{
 387        old="$1"
 388        oldsub="$2"
 389        newsub="$3"
 390        tree=$(toptree_for_commit $newsub) || exit $?
 391        if [ -n "$old" ]; then
 392                squash_msg "$dir" "$oldsub" "$newsub" | 
 393                        git commit-tree "$tree" -p "$old" || exit $?
 394        else
 395                squash_msg "$dir" "" "$newsub" |
 396                        git commit-tree "$tree" || exit $?
 397        fi
 398}
 399
 400copy_or_skip()
 401{
 402        rev="$1"
 403        tree="$2"
 404        newparents="$3"
 405        assert [ -n "$tree" ]
 406
 407        identical=
 408        nonidentical=
 409        p=
 410        gotparents=
 411        for parent in $newparents; do
 412                ptree=$(toptree_for_commit $parent) || exit $?
 413                [ -z "$ptree" ] && continue
 414                if [ "$ptree" = "$tree" ]; then
 415                        # an identical parent could be used in place of this rev.
 416                        identical="$parent"
 417                else
 418                        nonidentical="$parent"
 419                fi
 420                
 421                # sometimes both old parents map to the same newparent;
 422                # eliminate duplicates
 423                is_new=1
 424                for gp in $gotparents; do
 425                        if [ "$gp" = "$parent" ]; then
 426                                is_new=
 427                                break
 428                        fi
 429                done
 430                if [ -n "$is_new" ]; then
 431                        gotparents="$gotparents $parent"
 432                        p="$p -p $parent"
 433                fi
 434        done
 435        
 436        if [ -n "$identical" ]; then
 437                echo $identical
 438        else
 439                copy_commit $rev $tree "$p" || exit $?
 440        fi
 441}
 442
 443ensure_clean()
 444{
 445        if ! git diff-index HEAD --exit-code --quiet; then
 446                die "Working tree has modifications.  Cannot add."
 447        fi
 448        if ! git diff-index --cached HEAD --exit-code --quiet; then
 449                die "Index has modifications.  Cannot add."
 450        fi
 451}
 452
 453cmd_add()
 454{
 455        if [ -e "$dir" ]; then
 456                die "'$dir' already exists.  Cannot add."
 457        fi
 458        ensure_clean
 459        
 460        set -- $revs
 461        if [ $# -ne 1 ]; then
 462                die "You must provide exactly one revision.  Got: '$revs'"
 463        fi
 464        rev="$1"
 465        
 466        debug "Adding $dir as '$rev'..."
 467        git read-tree --prefix="$dir" $rev || exit $?
 468        git checkout -- "$dir" || exit $?
 469        tree=$(git write-tree) || exit $?
 470        
 471        headrev=$(git rev-parse HEAD) || exit $?
 472        if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
 473                headp="-p $headrev"
 474        else
 475                headp=
 476        fi
 477        
 478        if [ -n "$squash" ]; then
 479                rev=$(new_squash_commit "" "" "$rev") || exit $?
 480                commit=$(add_squashed_msg "$rev" "$dir" |
 481                         git commit-tree $tree $headp -p "$rev") || exit $?
 482        else
 483                commit=$(add_msg "$dir" "$headrev" "$rev" |
 484                         git commit-tree $tree $headp -p "$rev") || exit $?
 485        fi
 486        git reset "$commit" || exit $?
 487        
 488        say "Added dir '$dir'"
 489}
 490
 491cmd_split()
 492{
 493        debug "Splitting $dir..."
 494        cache_setup || exit $?
 495        
 496        if [ -n "$onto" ]; then
 497                debug "Reading history for --onto=$onto..."
 498                git rev-list $onto |
 499                while read rev; do
 500                        # the 'onto' history is already just the subdir, so
 501                        # any parent we find there can be used verbatim
 502                        debug "  cache: $rev"
 503                        cache_set $rev $rev
 504                done
 505        fi
 506        
 507        if [ -n "$ignore_joins" ]; then
 508                unrevs=
 509        else
 510                unrevs="$(find_existing_splits "$dir" "$revs")"
 511        fi
 512        
 513        # We can't restrict rev-list to only $dir here, because some of our
 514        # parents have the $dir contents the root, and those won't match.
 515        # (and rev-list --follow doesn't seem to solve this)
 516        grl='git rev-list --reverse --parents $revs $unrevs'
 517        revmax=$(eval "$grl" | wc -l)
 518        revcount=0
 519        createcount=0
 520        eval "$grl" |
 521        while read rev parents; do
 522                revcount=$(($revcount + 1))
 523                say -n "$revcount/$revmax ($createcount)
"
 524                debug "Processing commit: $rev"
 525                exists=$(cache_get $rev)
 526                if [ -n "$exists" ]; then
 527                        debug "  prior: $exists"
 528                        continue
 529                fi
 530                createcount=$(($createcount + 1))
 531                debug "  parents: $parents"
 532                newparents=$(cache_get $parents)
 533                debug "  newparents: $newparents"
 534                
 535                tree=$(subtree_for_commit $rev "$dir")
 536                debug "  tree is: $tree"
 537                
 538                # ugly.  is there no better way to tell if this is a subtree
 539                # vs. a mainline commit?  Does it matter?
 540                if [ -z $tree ]; then
 541                        cache_set $rev $rev
 542                        continue
 543                fi
 544
 545                newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
 546                debug "  newrev is: $newrev"
 547                cache_set $rev $newrev
 548                cache_set latest_new $newrev
 549                cache_set latest_old $rev
 550        done || exit $?
 551        latest_new=$(cache_get latest_new)
 552        if [ -z "$latest_new" ]; then
 553                die "No new revisions were found"
 554        fi
 555        
 556        if [ -n "$rejoin" ]; then
 557                debug "Merging split branch into HEAD..."
 558                latest_old=$(cache_get latest_old)
 559                git merge -s ours \
 560                        -m "$(rejoin_msg $dir $latest_old $latest_new)" \
 561                        $latest_new >&2 || exit $?
 562        fi
 563        if [ -n "$branch" ]; then
 564                if rev_exists "refs/heads/$branch"; then
 565                        if ! rev_is_descendant_of_branch $latest_new $branch; then
 566                                die "Branch '$branch' is not an ancestor of commit '$latest_new'."
 567                        fi
 568                        action='Updated'
 569                else
 570                        action='Created'
 571                fi
 572                git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
 573                say "$action branch '$branch'"
 574        fi
 575        echo $latest_new
 576        exit 0
 577}
 578
 579cmd_merge()
 580{
 581        ensure_clean
 582        
 583        set -- $revs
 584        if [ $# -ne 1 ]; then
 585                die "You must provide exactly one revision.  Got: '$revs'"
 586        fi
 587        rev="$1"
 588        
 589        if [ -n "$squash" ]; then
 590                first_split="$(find_latest_squash "$dir")"
 591                if [ -z "$first_split" ]; then
 592                        die "Can't squash-merge: '$dir' was never added."
 593                fi
 594                set $first_split
 595                old=$1
 596                sub=$2
 597                if [ "$sub" = "$rev" ]; then
 598                        say "Subtree is already at commit $rev."
 599                        exit 0
 600                fi
 601                new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
 602                debug "New squash commit: $new"
 603                rev="$new"
 604        fi
 605        
 606        git merge -s subtree --message="$message" $rev
 607}
 608
 609cmd_pull()
 610{
 611        ensure_clean
 612        git fetch "$@" || exit $?
 613        revs=FETCH_HEAD
 614        cmd_merge
 615}
 616
 617"cmd_$command" "$@"