8baa376fe5b99dff91fa6cfc9f8fedab69644acb
   1#!/bin/bash
   2#
   3# git-subtree.sh: split/join git repositories in subdirectories of this one
   4#
   5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
   6#
   7if [ $# -eq 0 ]; then
   8    set -- -h
   9fi
  10OPTS_SPEC="\
  11git subtree add   --prefix=<prefix> <commit>
  12git subtree merge --prefix=<prefix> <commit>
  13git subtree pull  --prefix=<prefix> <repository> <refspec...>
  14git subtree split --prefix=<prefix> <commit...>
  15--
  16h,help        show the help
  17q             quiet
  18d             show debug messages
  19prefix=       the name of the subdir to split out
  20 options for 'split'
  21annotate=     add a prefix to commit message of new commits
  22b,branch=     create a new branch from the split subtree
  23ignore-joins  ignore prior --rejoin commits
  24onto=         try connecting new tree to an existing one
  25rejoin        merge the new branch back into HEAD
  26 options for 'add', 'merge', and 'pull'
  27squash        merge subtree changes as a single commit
  28"
  29eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
  30PATH=$(git --exec-path):$PATH
  31. git-sh-setup
  32require_work_tree
  33
  34quiet=
  35branch=
  36debug=
  37command=
  38onto=
  39rejoin=
  40ignore_joins=
  41annotate=
  42squash=
  43
  44debug()
  45{
  46        if [ -n "$debug" ]; then
  47                echo "$@" >&2
  48        fi
  49}
  50
  51say()
  52{
  53        if [ -z "$quiet" ]; then
  54                echo "$@" >&2
  55        fi
  56}
  57
  58assert()
  59{
  60        if "$@"; then
  61                :
  62        else
  63                die "assertion failed: " "$@"
  64        fi
  65}
  66
  67
  68#echo "Options: $*"
  69
  70while [ $# -gt 0 ]; do
  71        opt="$1"
  72        shift
  73        case "$opt" in
  74                -q) quiet=1 ;;
  75                -d) debug=1 ;;
  76                --annotate) annotate="$1"; shift ;;
  77                --no-annotate) annotate= ;;
  78                -b) branch="$1"; shift ;;
  79                --prefix) prefix="$1"; shift ;;
  80                --no-prefix) prefix= ;;
  81                --onto) onto="$1"; shift ;;
  82                --no-onto) onto= ;;
  83                --rejoin) rejoin=1 ;;
  84                --no-rejoin) rejoin= ;;
  85                --ignore-joins) ignore_joins=1 ;;
  86                --no-ignore-joins) ignore_joins= ;;
  87                --squash) squash=1 ;;
  88                --no-squash) squash= ;;
  89                --) break ;;
  90                *) die "Unexpected option: $opt" ;;
  91        esac
  92done
  93
  94command="$1"
  95shift
  96case "$command" in
  97        add|merge|pull) default= ;;
  98        split) default="--default HEAD" ;;
  99        *) die "Unknown command '$command'" ;;
 100esac
 101
 102if [ -z "$prefix" ]; then
 103        die "You must provide the --prefix option."
 104fi
 105dir="$(dirname "$prefix/.")"
 106
 107if [ "$command" != "pull" ]; then
 108        revs=$(git rev-parse $default --revs-only "$@") || exit $?
 109        dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
 110        if [ -n "$dirs" ]; then
 111                die "Error: Use --prefix instead of bare filenames."
 112        fi
 113fi
 114
 115debug "command: {$command}"
 116debug "quiet: {$quiet}"
 117debug "revs: {$revs}"
 118debug "dir: {$dir}"
 119debug "opts: {$*}"
 120debug
 121
 122cache_setup()
 123{
 124        cachedir="$GIT_DIR/subtree-cache/$$"
 125        rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
 126        mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
 127        debug "Using cachedir: $cachedir" >&2
 128}
 129
 130cache_get()
 131{
 132        for oldrev in $*; do
 133                if [ -r "$cachedir/$oldrev" ]; then
 134                        read newrev <"$cachedir/$oldrev"
 135                        echo $newrev
 136                fi
 137        done
 138}
 139
 140cache_set()
 141{
 142        oldrev="$1"
 143        newrev="$2"
 144        if [ "$oldrev" != "latest_old" \
 145             -a "$oldrev" != "latest_new" \
 146             -a -e "$cachedir/$oldrev" ]; then
 147                die "cache for $oldrev already exists!"
 148        fi
 149        echo "$newrev" >"$cachedir/$oldrev"
 150}
 151
 152rev_exists()
 153{
 154        if git rev-parse "$1" >/dev/null 2>&1; then
 155                return 0
 156        else
 157                return 1
 158        fi
 159}
 160
 161# if a commit doesn't have a parent, this might not work.  But we only want
 162# to remove the parent from the rev-list, and since it doesn't exist, it won't
 163# be there anyway, so do nothing in that case.
 164try_remove_previous()
 165{
 166        if rev_exists "$1^"; then
 167                echo "^$1^"
 168        fi
 169}
 170
 171find_latest_squash()
 172{
 173        debug "Looking for latest squash ($dir)..."
 174        dir="$1"
 175        sq=
 176        main=
 177        sub=
 178        git log --grep="^git-subtree-dir: $dir/*\$" \
 179                --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
 180        while read a b junk; do
 181                debug "$a $b $junk"
 182                debug "{{$sq/$main/$sub}}"
 183                case "$a" in
 184                        START) sq="$b" ;;
 185                        git-subtree-mainline:) main="$b" ;;
 186                        git-subtree-split:) sub="$b" ;;
 187                        END)
 188                                if [ -n "$sub" ]; then
 189                                        if [ -n "$main" ]; then
 190                                                # a rejoin commit?
 191                                                # Pretend its sub was a squash.
 192                                                sq="$sub"
 193                                        fi
 194                                        debug "Squash found: $sq $sub"
 195                                        echo "$sq" "$sub"
 196                                        break
 197                                fi
 198                                sq=
 199                                main=
 200                                sub=
 201                                ;;
 202                esac
 203        done
 204}
 205
 206find_existing_splits()
 207{
 208        debug "Looking for prior splits..."
 209        dir="$1"
 210        revs="$2"
 211        main=
 212        sub=
 213        git log --grep="^git-subtree-dir: $dir/*\$" \
 214                --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
 215        while read a b junk; do
 216                case "$a" in
 217                        START) sq="$b" ;;
 218                        git-subtree-mainline:) main="$b" ;;
 219                        git-subtree-split:) sub="$b" ;;
 220                        END)
 221                                debug "  Main is: '$main'"
 222                                if [ -z "$main" -a -n "$sub" ]; then
 223                                        # squash commits refer to a subtree
 224                                        debug "  Squash: $sq from $sub"
 225                                        cache_set "$sq" "$sub"
 226                                fi
 227                                if [ -n "$main" -a -n "$sub" ]; then
 228                                        debug "  Prior: $main -> $sub"
 229                                        cache_set $main $sub
 230                                        try_remove_previous "$main"
 231                                        try_remove_previous "$sub"
 232                                fi
 233                                main=
 234                                sub=
 235                                ;;
 236                esac
 237        done
 238}
 239
 240copy_commit()
 241{
 242        # We're going to set some environment vars here, so
 243        # do it in a subshell to get rid of them safely later
 244        debug copy_commit "{$1}" "{$2}" "{$3}"
 245        git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
 246        (
 247                read GIT_AUTHOR_NAME
 248                read GIT_AUTHOR_EMAIL
 249                read GIT_AUTHOR_DATE
 250                read GIT_COMMITTER_NAME
 251                read GIT_COMMITTER_EMAIL
 252                read GIT_COMMITTER_DATE
 253                export  GIT_AUTHOR_NAME \
 254                        GIT_AUTHOR_EMAIL \
 255                        GIT_AUTHOR_DATE \
 256                        GIT_COMMITTER_NAME \
 257                        GIT_COMMITTER_EMAIL \
 258                        GIT_COMMITTER_DATE
 259                (echo -n "$annotate"; cat ) |
 260                git commit-tree "$2" $3  # reads the rest of stdin
 261        ) || die "Can't copy commit $1"
 262}
 263
 264add_msg()
 265{
 266        dir="$1"
 267        latest_old="$2"
 268        latest_new="$3"
 269        cat <<-EOF
 270                Add '$dir/' from commit '$latest_new'
 271                
 272                git-subtree-dir: $dir
 273                git-subtree-mainline: $latest_old
 274                git-subtree-split: $latest_new
 275        EOF
 276}
 277
 278rejoin_msg()
 279{
 280        dir="$1"
 281        latest_old="$2"
 282        latest_new="$3"
 283        cat <<-EOF
 284                Split '$dir/' into commit '$latest_new'
 285                
 286                git-subtree-dir: $dir
 287                git-subtree-mainline: $latest_old
 288                git-subtree-split: $latest_new
 289        EOF
 290}
 291
 292squash_msg()
 293{
 294        dir="$1"
 295        oldsub="$2"
 296        newsub="$3"
 297        newsub_short=$(git rev-parse --short "$newsub")
 298        
 299        if [ -n "$oldsub" ]; then
 300                oldsub_short=$(git rev-parse --short "$oldsub")
 301                echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
 302                echo
 303                git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
 304                git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
 305        else
 306                echo "Squashed '$dir/' content from commit $newsub_short"
 307        fi
 308        
 309        echo
 310        echo "git-subtree-dir: $dir"
 311        echo "git-subtree-split: $newsub"
 312}
 313
 314toptree_for_commit()
 315{
 316        commit="$1"
 317        git log -1 --pretty=format:'%T' "$commit" -- || exit $?
 318}
 319
 320subtree_for_commit()
 321{
 322        commit="$1"
 323        dir="$2"
 324        git ls-tree "$commit" -- "$dir" |
 325        while read mode type tree name; do
 326                assert [ "$name" = "$dir" ]
 327                assert [ "$type" = "tree" ]
 328                echo $tree
 329                break
 330        done
 331}
 332
 333tree_changed()
 334{
 335        tree=$1
 336        shift
 337        if [ $# -ne 1 ]; then
 338                return 0   # weird parents, consider it changed
 339        else
 340                ptree=$(toptree_for_commit $1)
 341                if [ "$ptree" != "$tree" ]; then
 342                        return 0   # changed
 343                else
 344                        return 1   # not changed
 345                fi
 346        fi
 347}
 348
 349new_squash_commit()
 350{
 351        old="$1"
 352        oldsub="$2"
 353        newsub="$3"
 354        tree=$(toptree_for_commit $newsub) || exit $?
 355        if [ -n "$old" ]; then
 356                squash_msg "$dir" "$oldsub" "$newsub" | 
 357                        git commit-tree "$tree" -p "$old" || exit $?
 358        else
 359                squash_msg "$dir" "" "$newsub" |
 360                        git commit-tree "$tree" || exit $?
 361        fi
 362}
 363
 364copy_or_skip()
 365{
 366        rev="$1"
 367        tree="$2"
 368        newparents="$3"
 369        assert [ -n "$tree" ]
 370
 371        identical=
 372        nonidentical=
 373        p=
 374        gotparents=
 375        for parent in $newparents; do
 376                ptree=$(toptree_for_commit $parent) || exit $?
 377                [ -z "$ptree" ] && continue
 378                if [ "$ptree" = "$tree" ]; then
 379                        # an identical parent could be used in place of this rev.
 380                        identical="$parent"
 381                else
 382                        nonidentical="$parent"
 383                fi
 384                
 385                # sometimes both old parents map to the same newparent;
 386                # eliminate duplicates
 387                is_new=1
 388                for gp in $gotparents; do
 389                        if [ "$gp" = "$parent" ]; then
 390                                is_new=
 391                                break
 392                        fi
 393                done
 394                if [ -n "$is_new" ]; then
 395                        gotparents="$gotparents $parent"
 396                        p="$p -p $parent"
 397                fi
 398        done
 399        
 400        if [ -n "$identical" ]; then
 401                echo $identical
 402        else
 403                copy_commit $rev $tree "$p" || exit $?
 404        fi
 405}
 406
 407ensure_clean()
 408{
 409        if ! git diff-index HEAD --exit-code --quiet; then
 410                die "Working tree has modifications.  Cannot add."
 411        fi
 412        if ! git diff-index --cached HEAD --exit-code --quiet; then
 413                die "Index has modifications.  Cannot add."
 414        fi
 415}
 416
 417cmd_add()
 418{
 419        if [ -e "$dir" ]; then
 420                die "'$dir' already exists.  Cannot add."
 421        fi
 422        ensure_clean
 423        
 424        set -- $revs
 425        if [ $# -ne 1 ]; then
 426                die "You must provide exactly one revision.  Got: '$revs'"
 427        fi
 428        rev="$1"
 429        
 430        debug "Adding $dir as '$rev'..."
 431        git read-tree --prefix="$dir" $rev || exit $?
 432        git checkout -- "$dir" || exit $?
 433        tree=$(git write-tree) || exit $?
 434        
 435        headrev=$(git rev-parse HEAD) || exit $?
 436        if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
 437                headp="-p $headrev"
 438        else
 439                headp=
 440        fi
 441        
 442        if [ -n "$squash" ]; then
 443                rev=$(new_squash_commit "" "" "$rev") || exit $?
 444                commit=$(echo "Merge commit '$rev' as '$dir'" |
 445                         git commit-tree $tree $headp -p "$rev") || exit $?
 446        else
 447                commit=$(add_msg "$dir" "$headrev" "$rev" |
 448                         git commit-tree $tree $headp -p "$rev") || exit $?
 449        fi
 450        git reset "$commit" || exit $?
 451        
 452        say "Added dir '$dir'"
 453}
 454
 455cmd_split()
 456{
 457        if [ -n "$branch" ] && rev_exists "refs/heads/$branch"; then
 458                die "Branch '$branch' already exists."
 459        fi
 460
 461        debug "Splitting $dir..."
 462        cache_setup || exit $?
 463        
 464        if [ -n "$onto" ]; then
 465                debug "Reading history for --onto=$onto..."
 466                git rev-list $onto |
 467                while read rev; do
 468                        # the 'onto' history is already just the subdir, so
 469                        # any parent we find there can be used verbatim
 470                        debug "  cache: $rev"
 471                        cache_set $rev $rev
 472                done
 473        fi
 474        
 475        if [ -n "$ignore_joins" ]; then
 476                unrevs=
 477        else
 478                unrevs="$(find_existing_splits "$dir" "$revs")"
 479        fi
 480        
 481        # We can't restrict rev-list to only $dir here, because some of our
 482        # parents have the $dir contents the root, and those won't match.
 483        # (and rev-list --follow doesn't seem to solve this)
 484        grl='git rev-list --reverse --parents $revs $unrevs'
 485        revmax=$(eval "$grl" | wc -l)
 486        revcount=0
 487        createcount=0
 488        eval "$grl" |
 489        while read rev parents; do
 490                revcount=$(($revcount + 1))
 491                say -n "$revcount/$revmax ($createcount)
"
 492                debug "Processing commit: $rev"
 493                exists=$(cache_get $rev)
 494                if [ -n "$exists" ]; then
 495                        debug "  prior: $exists"
 496                        continue
 497                fi
 498                createcount=$(($createcount + 1))
 499                debug "  parents: $parents"
 500                newparents=$(cache_get $parents)
 501                debug "  newparents: $newparents"
 502                
 503                tree=$(subtree_for_commit $rev "$dir")
 504                debug "  tree is: $tree"
 505                
 506                # ugly.  is there no better way to tell if this is a subtree
 507                # vs. a mainline commit?  Does it matter?
 508                [ -z $tree ] && continue
 509
 510                newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
 511                debug "  newrev is: $newrev"
 512                cache_set $rev $newrev
 513                cache_set latest_new $newrev
 514                cache_set latest_old $rev
 515        done || exit $?
 516        latest_new=$(cache_get latest_new)
 517        if [ -z "$latest_new" ]; then
 518                die "No new revisions were found"
 519        fi
 520        
 521        if [ -n "$rejoin" ]; then
 522                debug "Merging split branch into HEAD..."
 523                latest_old=$(cache_get latest_old)
 524                git merge -s ours \
 525                        -m "$(rejoin_msg $dir $latest_old $latest_new)" \
 526                        $latest_new >&2 || exit $?
 527        fi
 528        if [ -n "$branch" ]; then
 529                git update-ref -m 'subtree split' "refs/heads/$branch" \
 530                        $latest_new "" || exit $?
 531                say "Created branch '$branch'"
 532        fi
 533        echo $latest_new
 534        exit 0
 535}
 536
 537cmd_merge()
 538{
 539        ensure_clean
 540        
 541        set -- $revs
 542        if [ $# -ne 1 ]; then
 543                die "You must provide exactly one revision.  Got: '$revs'"
 544        fi
 545        rev="$1"
 546        
 547        if [ -n "$squash" ]; then
 548                first_split="$(find_latest_squash "$dir")"
 549                if [ -z "$first_split" ]; then
 550                        die "Can't squash-merge: '$dir' was never added."
 551                fi
 552                set $first_split
 553                old=$1
 554                sub=$2
 555                if [ "$sub" = "$rev" ]; then
 556                        say "Subtree is already at commit $rev."
 557                        exit 0
 558                fi
 559                new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
 560                debug "New squash commit: $new"
 561                rev="$new"
 562        fi
 563        
 564        git merge -s subtree $rev
 565}
 566
 567cmd_pull()
 568{
 569        ensure_clean
 570        git fetch "$@" || exit $?
 571        revs=FETCH_HEAD
 572        cmd_merge
 573}
 574
 575"cmd_$command" "$@"