contrib / subtree / git-subtree.shon commit hashmap: add simplified hashmap_get_from_hash() API (ab73a9d)
   1#!/bin/sh
   2#
   3# git-subtree.sh: split/join git repositories in subdirectories of this one
   4#
   5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
   6#
   7if [ $# -eq 0 ]; then
   8    set -- -h
   9fi
  10OPTS_SPEC="\
  11git subtree add   --prefix=<prefix> <commit>
  12git subtree add   --prefix=<prefix> <repository> <ref>
  13git subtree merge --prefix=<prefix> <commit>
  14git subtree pull  --prefix=<prefix> <repository> <ref>
  15git subtree push  --prefix=<prefix> <repository> <ref>
  16git subtree split --prefix=<prefix> <commit...>
  17--
  18h,help        show the help
  19q             quiet
  20d             show debug messages
  21P,prefix=     the name of the subdir to split out
  22m,message=    use the given message as the commit message for the merge commit
  23 options for 'split'
  24annotate=     add a prefix to commit message of new commits
  25b,branch=     create a new branch from the split subtree
  26ignore-joins  ignore prior --rejoin commits
  27onto=         try connecting new tree to an existing one
  28rejoin        merge the new branch back into HEAD
  29 options for 'add', 'merge', 'pull' and 'push'
  30squash        merge subtree changes as a single commit
  31"
  32eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
  33
  34PATH=$PATH:$(git --exec-path)
  35. git-sh-setup
  36
  37require_work_tree
  38
  39quiet=
  40branch=
  41debug=
  42command=
  43onto=
  44rejoin=
  45ignore_joins=
  46annotate=
  47squash=
  48message=
  49prefix=
  50
  51debug()
  52{
  53        if [ -n "$debug" ]; then
  54                echo "$@" >&2
  55        fi
  56}
  57
  58say()
  59{
  60        if [ -z "$quiet" ]; then
  61                echo "$@" >&2
  62        fi
  63}
  64
  65assert()
  66{
  67        if "$@"; then
  68                :
  69        else
  70                die "assertion failed: " "$@"
  71        fi
  72}
  73
  74
  75#echo "Options: $*"
  76
  77while [ $# -gt 0 ]; do
  78        opt="$1"
  79        shift
  80        case "$opt" in
  81                -q) quiet=1 ;;
  82                -d) debug=1 ;;
  83                --annotate) annotate="$1"; shift ;;
  84                --no-annotate) annotate= ;;
  85                -b) branch="$1"; shift ;;
  86                -P) prefix="$1"; shift ;;
  87                -m) message="$1"; shift ;;
  88                --no-prefix) prefix= ;;
  89                --onto) onto="$1"; shift ;;
  90                --no-onto) onto= ;;
  91                --rejoin) rejoin=1 ;;
  92                --no-rejoin) rejoin= ;;
  93                --ignore-joins) ignore_joins=1 ;;
  94                --no-ignore-joins) ignore_joins= ;;
  95                --squash) squash=1 ;;
  96                --no-squash) squash= ;;
  97                --) break ;;
  98                *) die "Unexpected option: $opt" ;;
  99        esac
 100done
 101
 102command="$1"
 103shift
 104case "$command" in
 105        add|merge|pull) default= ;;
 106        split|push) default="--default HEAD" ;;
 107        *) die "Unknown command '$command'" ;;
 108esac
 109
 110if [ -z "$prefix" ]; then
 111        die "You must provide the --prefix option."
 112fi
 113
 114case "$command" in
 115        add) [ -e "$prefix" ] && 
 116                die "prefix '$prefix' already exists." ;;
 117        *)   [ -e "$prefix" ] || 
 118                die "'$prefix' does not exist; use 'git subtree add'" ;;
 119esac
 120
 121dir="$(dirname "$prefix/.")"
 122
 123if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
 124        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 125        dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
 126        if [ -n "$dirs" ]; then
 127                die "Error: Use --prefix instead of bare filenames."
 128        fi
 129fi
 130
 131debug "command: {$command}"
 132debug "quiet: {$quiet}"
 133debug "revs: {$revs}"
 134debug "dir: {$dir}"
 135debug "opts: {$*}"
 136debug
 137
 138cache_setup()
 139{
 140        cachedir="$GIT_DIR/subtree-cache/$$"
 141        rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
 142        mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
 143        mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
 144        debug "Using cachedir: $cachedir" >&2
 145}
 146
 147cache_get()
 148{
 149        for oldrev in $*; do
 150                if [ -r "$cachedir/$oldrev" ]; then
 151                        read newrev <"$cachedir/$oldrev"
 152                        echo $newrev
 153                fi
 154        done
 155}
 156
 157cache_miss()
 158{
 159        for oldrev in $*; do
 160                if [ ! -r "$cachedir/$oldrev" ]; then
 161                        echo $oldrev
 162                fi
 163        done
 164}
 165
 166check_parents()
 167{
 168        missed=$(cache_miss $*)
 169        for miss in $missed; do
 170                if [ ! -r "$cachedir/notree/$miss" ]; then
 171                        debug "  incorrect order: $miss"
 172                fi
 173        done
 174}
 175
 176set_notree()
 177{
 178        echo "1" > "$cachedir/notree/$1"
 179}
 180
 181cache_set()
 182{
 183        oldrev="$1"
 184        newrev="$2"
 185        if [ "$oldrev" != "latest_old" \
 186             -a "$oldrev" != "latest_new" \
 187             -a -e "$cachedir/$oldrev" ]; then
 188                die "cache for $oldrev already exists!"
 189        fi
 190        echo "$newrev" >"$cachedir/$oldrev"
 191}
 192
 193rev_exists()
 194{
 195        if git rev-parse "$1" >/dev/null 2>&1; then
 196                return 0
 197        else
 198                return 1
 199        fi
 200}
 201
 202rev_is_descendant_of_branch()
 203{
 204        newrev="$1"
 205        branch="$2"
 206        branch_hash=$(git rev-parse $branch)
 207        match=$(git rev-list -1 $branch_hash ^$newrev)
 208
 209        if [ -z "$match" ]; then
 210                return 0
 211        else
 212                return 1
 213        fi
 214}
 215
 216# if a commit doesn't have a parent, this might not work.  But we only want
 217# to remove the parent from the rev-list, and since it doesn't exist, it won't
 218# be there anyway, so do nothing in that case.
 219try_remove_previous()
 220{
 221        if rev_exists "$1^"; then
 222                echo "^$1^"
 223        fi
 224}
 225
 226find_latest_squash()
 227{
 228        debug "Looking for latest squash ($dir)..."
 229        dir="$1"
 230        sq=
 231        main=
 232        sub=
 233        git log --grep="^git-subtree-dir: $dir/*\$" \
 234                --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
 235        while read a b junk; do
 236                debug "$a $b $junk"
 237                debug "{{$sq/$main/$sub}}"
 238                case "$a" in
 239                        START) sq="$b" ;;
 240                        git-subtree-mainline:) main="$b" ;;
 241                        git-subtree-split:) sub="$b" ;;
 242                        END)
 243                                if [ -n "$sub" ]; then
 244                                        if [ -n "$main" ]; then
 245                                                # a rejoin commit?
 246                                                # Pretend its sub was a squash.
 247                                                sq="$sub"
 248                                        fi
 249                                        debug "Squash found: $sq $sub"
 250                                        echo "$sq" "$sub"
 251                                        break
 252                                fi
 253                                sq=
 254                                main=
 255                                sub=
 256                                ;;
 257                esac
 258        done
 259}
 260
 261find_existing_splits()
 262{
 263        debug "Looking for prior splits..."
 264        dir="$1"
 265        revs="$2"
 266        main=
 267        sub=
 268        git log --grep="^git-subtree-dir: $dir/*\$" \
 269                --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
 270        while read a b junk; do
 271                case "$a" in
 272                        START) sq="$b" ;;
 273                        git-subtree-mainline:) main="$b" ;;
 274                        git-subtree-split:) sub="$b" ;;
 275                        END)
 276                                debug "  Main is: '$main'"
 277                                if [ -z "$main" -a -n "$sub" ]; then
 278                                        # squash commits refer to a subtree
 279                                        debug "  Squash: $sq from $sub"
 280                                        cache_set "$sq" "$sub"
 281                                fi
 282                                if [ -n "$main" -a -n "$sub" ]; then
 283                                        debug "  Prior: $main -> $sub"
 284                                        cache_set $main $sub
 285                                        cache_set $sub $sub
 286                                        try_remove_previous "$main"
 287                                        try_remove_previous "$sub"
 288                                fi
 289                                main=
 290                                sub=
 291                                ;;
 292                esac
 293        done
 294}
 295
 296copy_commit()
 297{
 298        # We're going to set some environment vars here, so
 299        # do it in a subshell to get rid of them safely later
 300        debug copy_commit "{$1}" "{$2}" "{$3}"
 301        git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%B' "$1" |
 302        (
 303                read GIT_AUTHOR_NAME
 304                read GIT_AUTHOR_EMAIL
 305                read GIT_AUTHOR_DATE
 306                read GIT_COMMITTER_NAME
 307                read GIT_COMMITTER_EMAIL
 308                read GIT_COMMITTER_DATE
 309                export  GIT_AUTHOR_NAME \
 310                        GIT_AUTHOR_EMAIL \
 311                        GIT_AUTHOR_DATE \
 312                        GIT_COMMITTER_NAME \
 313                        GIT_COMMITTER_EMAIL \
 314                        GIT_COMMITTER_DATE
 315                (printf "%s" "$annotate"; cat ) |
 316                git commit-tree "$2" $3  # reads the rest of stdin
 317        ) || die "Can't copy commit $1"
 318}
 319
 320add_msg()
 321{
 322        dir="$1"
 323        latest_old="$2"
 324        latest_new="$3"
 325        if [ -n "$message" ]; then
 326                commit_message="$message"
 327        else
 328                commit_message="Add '$dir/' from commit '$latest_new'"
 329        fi
 330        cat <<-EOF
 331                $commit_message
 332                
 333                git-subtree-dir: $dir
 334                git-subtree-mainline: $latest_old
 335                git-subtree-split: $latest_new
 336        EOF
 337}
 338
 339add_squashed_msg()
 340{
 341        if [ -n "$message" ]; then
 342                echo "$message"
 343        else
 344                echo "Merge commit '$1' as '$2'"
 345        fi
 346}
 347
 348rejoin_msg()
 349{
 350        dir="$1"
 351        latest_old="$2"
 352        latest_new="$3"
 353        if [ -n "$message" ]; then
 354                commit_message="$message"
 355        else
 356                commit_message="Split '$dir/' into commit '$latest_new'"
 357        fi
 358        cat <<-EOF
 359                $commit_message
 360                
 361                git-subtree-dir: $dir
 362                git-subtree-mainline: $latest_old
 363                git-subtree-split: $latest_new
 364        EOF
 365}
 366
 367squash_msg()
 368{
 369        dir="$1"
 370        oldsub="$2"
 371        newsub="$3"
 372        newsub_short=$(git rev-parse --short "$newsub")
 373        
 374        if [ -n "$oldsub" ]; then
 375                oldsub_short=$(git rev-parse --short "$oldsub")
 376                echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
 377                echo
 378                git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
 379                git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
 380        else
 381                echo "Squashed '$dir/' content from commit $newsub_short"
 382        fi
 383        
 384        echo
 385        echo "git-subtree-dir: $dir"
 386        echo "git-subtree-split: $newsub"
 387}
 388
 389toptree_for_commit()
 390{
 391        commit="$1"
 392        git log -1 --pretty=format:'%T' "$commit" -- || exit $?
 393}
 394
 395subtree_for_commit()
 396{
 397        commit="$1"
 398        dir="$2"
 399        git ls-tree "$commit" -- "$dir" |
 400        while read mode type tree name; do
 401                assert [ "$name" = "$dir" ]
 402                assert [ "$type" = "tree" -o "$type" = "commit" ]
 403                [ "$type" = "commit" ] && continue  # ignore submodules
 404                echo $tree
 405                break
 406        done
 407}
 408
 409tree_changed()
 410{
 411        tree=$1
 412        shift
 413        if [ $# -ne 1 ]; then
 414                return 0   # weird parents, consider it changed
 415        else
 416                ptree=$(toptree_for_commit $1)
 417                if [ "$ptree" != "$tree" ]; then
 418                        return 0   # changed
 419                else
 420                        return 1   # not changed
 421                fi
 422        fi
 423}
 424
 425new_squash_commit()
 426{
 427        old="$1"
 428        oldsub="$2"
 429        newsub="$3"
 430        tree=$(toptree_for_commit $newsub) || exit $?
 431        if [ -n "$old" ]; then
 432                squash_msg "$dir" "$oldsub" "$newsub" | 
 433                        git commit-tree "$tree" -p "$old" || exit $?
 434        else
 435                squash_msg "$dir" "" "$newsub" |
 436                        git commit-tree "$tree" || exit $?
 437        fi
 438}
 439
 440copy_or_skip()
 441{
 442        rev="$1"
 443        tree="$2"
 444        newparents="$3"
 445        assert [ -n "$tree" ]
 446
 447        identical=
 448        nonidentical=
 449        p=
 450        gotparents=
 451        for parent in $newparents; do
 452                ptree=$(toptree_for_commit $parent) || exit $?
 453                [ -z "$ptree" ] && continue
 454                if [ "$ptree" = "$tree" ]; then
 455                        # an identical parent could be used in place of this rev.
 456                        identical="$parent"
 457                else
 458                        nonidentical="$parent"
 459                fi
 460                
 461                # sometimes both old parents map to the same newparent;
 462                # eliminate duplicates
 463                is_new=1
 464                for gp in $gotparents; do
 465                        if [ "$gp" = "$parent" ]; then
 466                                is_new=
 467                                break
 468                        fi
 469                done
 470                if [ -n "$is_new" ]; then
 471                        gotparents="$gotparents $parent"
 472                        p="$p -p $parent"
 473                fi
 474        done
 475        
 476        if [ -n "$identical" ]; then
 477                echo $identical
 478        else
 479                copy_commit $rev $tree "$p" || exit $?
 480        fi
 481}
 482
 483ensure_clean()
 484{
 485        if ! git diff-index HEAD --exit-code --quiet 2>&1; then
 486                die "Working tree has modifications.  Cannot add."
 487        fi
 488        if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
 489                die "Index has modifications.  Cannot add."
 490        fi
 491}
 492
 493ensure_valid_ref_format()
 494{
 495        git check-ref-format "refs/heads/$1" ||
 496            die "'$1' does not look like a ref"
 497}
 498
 499cmd_add()
 500{
 501        if [ -e "$dir" ]; then
 502                die "'$dir' already exists.  Cannot add."
 503        fi
 504
 505        ensure_clean
 506        
 507        if [ $# -eq 1 ]; then
 508            git rev-parse -q --verify "$1^{commit}" >/dev/null ||
 509            die "'$1' does not refer to a commit"
 510
 511            "cmd_add_commit" "$@"
 512        elif [ $# -eq 2 ]; then
 513            # Technically we could accept a refspec here but we're
 514            # just going to turn around and add FETCH_HEAD under the
 515            # specified directory.  Allowing a refspec might be
 516            # misleading because we won't do anything with any other
 517            # branches fetched via the refspec.
 518            ensure_valid_ref_format "$2"
 519
 520            "cmd_add_repository" "$@"
 521        else
 522            say "error: parameters were '$@'"
 523            die "Provide either a commit or a repository and commit."
 524        fi
 525}
 526
 527cmd_add_repository()
 528{
 529        echo "git fetch" "$@"
 530        repository=$1
 531        refspec=$2
 532        git fetch "$@" || exit $?
 533        revs=FETCH_HEAD
 534        set -- $revs
 535        cmd_add_commit "$@"
 536}
 537
 538cmd_add_commit()
 539{
 540        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 541        set -- $revs
 542        rev="$1"
 543        
 544        debug "Adding $dir as '$rev'..."
 545        git read-tree --prefix="$dir" $rev || exit $?
 546        git checkout -- "$dir" || exit $?
 547        tree=$(git write-tree) || exit $?
 548        
 549        headrev=$(git rev-parse HEAD) || exit $?
 550        if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
 551                headp="-p $headrev"
 552        else
 553                headp=
 554        fi
 555        
 556        if [ -n "$squash" ]; then
 557                rev=$(new_squash_commit "" "" "$rev") || exit $?
 558                commit=$(add_squashed_msg "$rev" "$dir" |
 559                         git commit-tree $tree $headp -p "$rev") || exit $?
 560        else
 561                revp=$(peel_committish "$rev") &&
 562                commit=$(add_msg "$dir" "$headrev" "$rev" |
 563                         git commit-tree $tree $headp -p "$revp") || exit $?
 564        fi
 565        git reset "$commit" || exit $?
 566        
 567        say "Added dir '$dir'"
 568}
 569
 570cmd_split()
 571{
 572        debug "Splitting $dir..."
 573        cache_setup || exit $?
 574        
 575        if [ -n "$onto" ]; then
 576                debug "Reading history for --onto=$onto..."
 577                git rev-list $onto |
 578                while read rev; do
 579                        # the 'onto' history is already just the subdir, so
 580                        # any parent we find there can be used verbatim
 581                        debug "  cache: $rev"
 582                        cache_set $rev $rev
 583                done
 584        fi
 585        
 586        if [ -n "$ignore_joins" ]; then
 587                unrevs=
 588        else
 589                unrevs="$(find_existing_splits "$dir" "$revs")"
 590        fi
 591        
 592        # We can't restrict rev-list to only $dir here, because some of our
 593        # parents have the $dir contents the root, and those won't match.
 594        # (and rev-list --follow doesn't seem to solve this)
 595        grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
 596        revmax=$(eval "$grl" | wc -l)
 597        revcount=0
 598        createcount=0
 599        eval "$grl" |
 600        while read rev parents; do
 601                revcount=$(($revcount + 1))
 602                say -n "$revcount/$revmax ($createcount)
"
 603                debug "Processing commit: $rev"
 604                exists=$(cache_get $rev)
 605                if [ -n "$exists" ]; then
 606                        debug "  prior: $exists"
 607                        continue
 608                fi
 609                createcount=$(($createcount + 1))
 610                debug "  parents: $parents"
 611                newparents=$(cache_get $parents)
 612                debug "  newparents: $newparents"
 613                
 614                tree=$(subtree_for_commit $rev "$dir")
 615                debug "  tree is: $tree"
 616
 617                check_parents $parents
 618                
 619                # ugly.  is there no better way to tell if this is a subtree
 620                # vs. a mainline commit?  Does it matter?
 621                if [ -z $tree ]; then
 622                        set_notree $rev
 623                        if [ -n "$newparents" ]; then
 624                                cache_set $rev $rev
 625                        fi
 626                        continue
 627                fi
 628
 629                newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
 630                debug "  newrev is: $newrev"
 631                cache_set $rev $newrev
 632                cache_set latest_new $newrev
 633                cache_set latest_old $rev
 634        done || exit $?
 635        latest_new=$(cache_get latest_new)
 636        if [ -z "$latest_new" ]; then
 637                die "No new revisions were found"
 638        fi
 639        
 640        if [ -n "$rejoin" ]; then
 641                debug "Merging split branch into HEAD..."
 642                latest_old=$(cache_get latest_old)
 643                git merge -s ours \
 644                        -m "$(rejoin_msg $dir $latest_old $latest_new)" \
 645                        $latest_new >&2 || exit $?
 646        fi
 647        if [ -n "$branch" ]; then
 648                if rev_exists "refs/heads/$branch"; then
 649                        if ! rev_is_descendant_of_branch $latest_new $branch; then
 650                                die "Branch '$branch' is not an ancestor of commit '$latest_new'."
 651                        fi
 652                        action='Updated'
 653                else
 654                        action='Created'
 655                fi
 656                git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
 657                say "$action branch '$branch'"
 658        fi
 659        echo $latest_new
 660        exit 0
 661}
 662
 663cmd_merge()
 664{
 665        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 666        ensure_clean
 667        
 668        set -- $revs
 669        if [ $# -ne 1 ]; then
 670                die "You must provide exactly one revision.  Got: '$revs'"
 671        fi
 672        rev="$1"
 673        
 674        if [ -n "$squash" ]; then
 675                first_split="$(find_latest_squash "$dir")"
 676                if [ -z "$first_split" ]; then
 677                        die "Can't squash-merge: '$dir' was never added."
 678                fi
 679                set $first_split
 680                old=$1
 681                sub=$2
 682                if [ "$sub" = "$rev" ]; then
 683                        say "Subtree is already at commit $rev."
 684                        exit 0
 685                fi
 686                new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
 687                debug "New squash commit: $new"
 688                rev="$new"
 689        fi
 690
 691        version=$(git version)
 692        if [ "$version" \< "git version 1.7" ]; then
 693                if [ -n "$message" ]; then
 694                        git merge -s subtree --message="$message" $rev
 695                else
 696                        git merge -s subtree $rev
 697                fi
 698        else
 699                if [ -n "$message" ]; then
 700                        git merge -Xsubtree="$prefix" --message="$message" $rev
 701                else
 702                        git merge -Xsubtree="$prefix" $rev
 703                fi
 704        fi
 705}
 706
 707cmd_pull()
 708{
 709        if [ $# -ne 2 ]; then
 710            die "You must provide <repository> <ref>"
 711        fi
 712        ensure_clean
 713        ensure_valid_ref_format "$2"
 714        git fetch "$@" || exit $?
 715        revs=FETCH_HEAD
 716        set -- $revs
 717        cmd_merge "$@"
 718}
 719
 720cmd_push()
 721{
 722        if [ $# -ne 2 ]; then
 723            die "You must provide <repository> <ref>"
 724        fi
 725        ensure_valid_ref_format "$2"
 726        if [ -e "$dir" ]; then
 727            repository=$1
 728            refspec=$2
 729            echo "git push using: " $repository $refspec
 730            localrev=$(git subtree split --prefix="$prefix") || die
 731            git push $repository $localrev:refs/heads/$refspec
 732        else
 733            die "'$dir' must already exist. Try 'git subtree add'."
 734        fi
 735}
 736
 737"cmd_$command" "$@"