1#!/bin/sh
2#
3# git-subtree.sh: split/join git repositories in subdirectories of this one
4#
5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6#
7if [ $# -eq 0 ]; then
8 set -- -h
9fi
10OPTS_SPEC="\
11git subtree add --prefix=<prefix> <commit>
12git subtree add --prefix=<prefix> <repository> <ref>
13git subtree merge --prefix=<prefix> <commit>
14git subtree pull --prefix=<prefix> <repository> <ref>
15git subtree push --prefix=<prefix> <repository> <ref>
16git subtree split --prefix=<prefix> <commit...>
17--
18h,help show the help
19q quiet
20d show debug messages
21P,prefix= the name of the subdir to split out
22m,message= use the given message as the commit message for the merge commit
23 options for 'split'
24annotate= add a prefix to commit message of new commits
25b,branch= create a new branch from the split subtree
26ignore-joins ignore prior --rejoin commits
27onto= try connecting new tree to an existing one
28rejoin merge the new branch back into HEAD
29 options for 'add', 'merge', 'pull' and 'push'
30squash merge subtree changes as a single commit
31"
32eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
33
34PATH=$PATH:$(git --exec-path)
35. git-sh-setup
36
37require_work_tree
38
39quiet=
40branch=
41debug=
42command=
43onto=
44rejoin=
45ignore_joins=
46annotate=
47squash=
48message=
49
50debug()
51{
52 if [ -n "$debug" ]; then
53 echo "$@" >&2
54 fi
55}
56
57say()
58{
59 if [ -z "$quiet" ]; then
60 echo "$@" >&2
61 fi
62}
63
64assert()
65{
66 if "$@"; then
67 :
68 else
69 die "assertion failed: " "$@"
70 fi
71}
72
73
74#echo "Options: $*"
75
76while [ $# -gt 0 ]; do
77 opt="$1"
78 shift
79 case "$opt" in
80 -q) quiet=1 ;;
81 -d) debug=1 ;;
82 --annotate) annotate="$1"; shift ;;
83 --no-annotate) annotate= ;;
84 -b) branch="$1"; shift ;;
85 -P) prefix="$1"; shift ;;
86 -m) message="$1"; shift ;;
87 --no-prefix) prefix= ;;
88 --onto) onto="$1"; shift ;;
89 --no-onto) onto= ;;
90 --rejoin) rejoin=1 ;;
91 --no-rejoin) rejoin= ;;
92 --ignore-joins) ignore_joins=1 ;;
93 --no-ignore-joins) ignore_joins= ;;
94 --squash) squash=1 ;;
95 --no-squash) squash= ;;
96 --) break ;;
97 *) die "Unexpected option: $opt" ;;
98 esac
99done
100
101command="$1"
102shift
103case "$command" in
104 add|merge|pull) default= ;;
105 split|push) default="--default HEAD" ;;
106 *) die "Unknown command '$command'" ;;
107esac
108
109if [ -z "$prefix" ]; then
110 die "You must provide the --prefix option."
111fi
112
113case "$command" in
114 add) [ -e "$prefix" ] &&
115 die "prefix '$prefix' already exists." ;;
116 *) [ -e "$prefix" ] ||
117 die "'$prefix' does not exist; use 'git subtree add'" ;;
118esac
119
120dir="$(dirname "$prefix/.")"
121
122if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
123 revs=$(git rev-parse $default --revs-only "$@") || exit $?
124 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
125 if [ -n "$dirs" ]; then
126 die "Error: Use --prefix instead of bare filenames."
127 fi
128fi
129
130debug "command: {$command}"
131debug "quiet: {$quiet}"
132debug "revs: {$revs}"
133debug "dir: {$dir}"
134debug "opts: {$*}"
135debug
136
137cache_setup()
138{
139 cachedir="$GIT_DIR/subtree-cache/$$"
140 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
141 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
142 mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
143 debug "Using cachedir: $cachedir" >&2
144}
145
146cache_get()
147{
148 for oldrev in $*; do
149 if [ -r "$cachedir/$oldrev" ]; then
150 read newrev <"$cachedir/$oldrev"
151 echo $newrev
152 fi
153 done
154}
155
156cache_miss()
157{
158 for oldrev in $*; do
159 if [ ! -r "$cachedir/$oldrev" ]; then
160 echo $oldrev
161 fi
162 done
163}
164
165check_parents()
166{
167 missed=$(cache_miss $*)
168 for miss in $missed; do
169 if [ ! -r "$cachedir/notree/$miss" ]; then
170 debug " incorrect order: $miss"
171 fi
172 done
173}
174
175set_notree()
176{
177 echo "1" > "$cachedir/notree/$1"
178}
179
180cache_set()
181{
182 oldrev="$1"
183 newrev="$2"
184 if [ "$oldrev" != "latest_old" \
185 -a "$oldrev" != "latest_new" \
186 -a -e "$cachedir/$oldrev" ]; then
187 die "cache for $oldrev already exists!"
188 fi
189 echo "$newrev" >"$cachedir/$oldrev"
190}
191
192rev_exists()
193{
194 if git rev-parse "$1" >/dev/null 2>&1; then
195 return 0
196 else
197 return 1
198 fi
199}
200
201rev_is_descendant_of_branch()
202{
203 newrev="$1"
204 branch="$2"
205 branch_hash=$(git rev-parse $branch)
206 match=$(git rev-list -1 $branch_hash ^$newrev)
207
208 if [ -z "$match" ]; then
209 return 0
210 else
211 return 1
212 fi
213}
214
215# if a commit doesn't have a parent, this might not work. But we only want
216# to remove the parent from the rev-list, and since it doesn't exist, it won't
217# be there anyway, so do nothing in that case.
218try_remove_previous()
219{
220 if rev_exists "$1^"; then
221 echo "^$1^"
222 fi
223}
224
225find_latest_squash()
226{
227 debug "Looking for latest squash ($dir)..."
228 dir="$1"
229 sq=
230 main=
231 sub=
232 git log --grep="^git-subtree-dir: $dir/*\$" \
233 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
234 while read a b junk; do
235 debug "$a $b $junk"
236 debug "{{$sq/$main/$sub}}"
237 case "$a" in
238 START) sq="$b" ;;
239 git-subtree-mainline:) main="$b" ;;
240 git-subtree-split:) sub="$b" ;;
241 END)
242 if [ -n "$sub" ]; then
243 if [ -n "$main" ]; then
244 # a rejoin commit?
245 # Pretend its sub was a squash.
246 sq="$sub"
247 fi
248 debug "Squash found: $sq $sub"
249 echo "$sq" "$sub"
250 break
251 fi
252 sq=
253 main=
254 sub=
255 ;;
256 esac
257 done
258}
259
260find_existing_splits()
261{
262 debug "Looking for prior splits..."
263 dir="$1"
264 revs="$2"
265 main=
266 sub=
267 git log --grep="^git-subtree-dir: $dir/*\$" \
268 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
269 while read a b junk; do
270 case "$a" in
271 START) sq="$b" ;;
272 git-subtree-mainline:) main="$b" ;;
273 git-subtree-split:) sub="$b" ;;
274 END)
275 debug " Main is: '$main'"
276 if [ -z "$main" -a -n "$sub" ]; then
277 # squash commits refer to a subtree
278 debug " Squash: $sq from $sub"
279 cache_set "$sq" "$sub"
280 fi
281 if [ -n "$main" -a -n "$sub" ]; then
282 debug " Prior: $main -> $sub"
283 cache_set $main $sub
284 cache_set $sub $sub
285 try_remove_previous "$main"
286 try_remove_previous "$sub"
287 fi
288 main=
289 sub=
290 ;;
291 esac
292 done
293}
294
295copy_commit()
296{
297 # We're going to set some environment vars here, so
298 # do it in a subshell to get rid of them safely later
299 debug copy_commit "{$1}" "{$2}" "{$3}"
300 git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%B' "$1" |
301 (
302 read GIT_AUTHOR_NAME
303 read GIT_AUTHOR_EMAIL
304 read GIT_AUTHOR_DATE
305 read GIT_COMMITTER_NAME
306 read GIT_COMMITTER_EMAIL
307 read GIT_COMMITTER_DATE
308 export GIT_AUTHOR_NAME \
309 GIT_AUTHOR_EMAIL \
310 GIT_AUTHOR_DATE \
311 GIT_COMMITTER_NAME \
312 GIT_COMMITTER_EMAIL \
313 GIT_COMMITTER_DATE
314 (printf "%s" "$annotate"; cat ) |
315 git commit-tree "$2" $3 # reads the rest of stdin
316 ) || die "Can't copy commit $1"
317}
318
319add_msg()
320{
321 dir="$1"
322 latest_old="$2"
323 latest_new="$3"
324 if [ -n "$message" ]; then
325 commit_message="$message"
326 else
327 commit_message="Add '$dir/' from commit '$latest_new'"
328 fi
329 cat <<-EOF
330 $commit_message
331
332 git-subtree-dir: $dir
333 git-subtree-mainline: $latest_old
334 git-subtree-split: $latest_new
335 EOF
336}
337
338add_squashed_msg()
339{
340 if [ -n "$message" ]; then
341 echo "$message"
342 else
343 echo "Merge commit '$1' as '$2'"
344 fi
345}
346
347rejoin_msg()
348{
349 dir="$1"
350 latest_old="$2"
351 latest_new="$3"
352 if [ -n "$message" ]; then
353 commit_message="$message"
354 else
355 commit_message="Split '$dir/' into commit '$latest_new'"
356 fi
357 cat <<-EOF
358 $commit_message
359
360 git-subtree-dir: $dir
361 git-subtree-mainline: $latest_old
362 git-subtree-split: $latest_new
363 EOF
364}
365
366squash_msg()
367{
368 dir="$1"
369 oldsub="$2"
370 newsub="$3"
371 newsub_short=$(git rev-parse --short "$newsub")
372
373 if [ -n "$oldsub" ]; then
374 oldsub_short=$(git rev-parse --short "$oldsub")
375 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
376 echo
377 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
378 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
379 else
380 echo "Squashed '$dir/' content from commit $newsub_short"
381 fi
382
383 echo
384 echo "git-subtree-dir: $dir"
385 echo "git-subtree-split: $newsub"
386}
387
388toptree_for_commit()
389{
390 commit="$1"
391 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
392}
393
394subtree_for_commit()
395{
396 commit="$1"
397 dir="$2"
398 git ls-tree "$commit" -- "$dir" |
399 while read mode type tree name; do
400 assert [ "$name" = "$dir" ]
401 assert [ "$type" = "tree" -o "$type" = "commit" ]
402 [ "$type" = "commit" ] && continue # ignore submodules
403 echo $tree
404 break
405 done
406}
407
408tree_changed()
409{
410 tree=$1
411 shift
412 if [ $# -ne 1 ]; then
413 return 0 # weird parents, consider it changed
414 else
415 ptree=$(toptree_for_commit $1)
416 if [ "$ptree" != "$tree" ]; then
417 return 0 # changed
418 else
419 return 1 # not changed
420 fi
421 fi
422}
423
424new_squash_commit()
425{
426 old="$1"
427 oldsub="$2"
428 newsub="$3"
429 tree=$(toptree_for_commit $newsub) || exit $?
430 if [ -n "$old" ]; then
431 squash_msg "$dir" "$oldsub" "$newsub" |
432 git commit-tree "$tree" -p "$old" || exit $?
433 else
434 squash_msg "$dir" "" "$newsub" |
435 git commit-tree "$tree" || exit $?
436 fi
437}
438
439copy_or_skip()
440{
441 rev="$1"
442 tree="$2"
443 newparents="$3"
444 assert [ -n "$tree" ]
445
446 identical=
447 nonidentical=
448 p=
449 gotparents=
450 for parent in $newparents; do
451 ptree=$(toptree_for_commit $parent) || exit $?
452 [ -z "$ptree" ] && continue
453 if [ "$ptree" = "$tree" ]; then
454 # an identical parent could be used in place of this rev.
455 identical="$parent"
456 else
457 nonidentical="$parent"
458 fi
459
460 # sometimes both old parents map to the same newparent;
461 # eliminate duplicates
462 is_new=1
463 for gp in $gotparents; do
464 if [ "$gp" = "$parent" ]; then
465 is_new=
466 break
467 fi
468 done
469 if [ -n "$is_new" ]; then
470 gotparents="$gotparents $parent"
471 p="$p -p $parent"
472 fi
473 done
474
475 if [ -n "$identical" ]; then
476 echo $identical
477 else
478 copy_commit $rev $tree "$p" || exit $?
479 fi
480}
481
482ensure_clean()
483{
484 if ! git diff-index HEAD --exit-code --quiet 2>&1; then
485 die "Working tree has modifications. Cannot add."
486 fi
487 if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
488 die "Index has modifications. Cannot add."
489 fi
490}
491
492ensure_valid_ref_format()
493{
494 git check-ref-format "refs/heads/$1" ||
495 die "'$1' does not look like a ref"
496}
497
498cmd_add()
499{
500 if [ -e "$dir" ]; then
501 die "'$dir' already exists. Cannot add."
502 fi
503
504 ensure_clean
505
506 if [ $# -eq 1 ]; then
507 git rev-parse -q --verify "$1^{commit}" >/dev/null ||
508 die "'$1' does not refer to a commit"
509
510 "cmd_add_commit" "$@"
511 elif [ $# -eq 2 ]; then
512 # Technically we could accept a refspec here but we're
513 # just going to turn around and add FETCH_HEAD under the
514 # specified directory. Allowing a refspec might be
515 # misleading because we won't do anything with any other
516 # branches fetched via the refspec.
517 ensure_valid_ref_format "$2"
518
519 "cmd_add_repository" "$@"
520 else
521 say "error: parameters were '$@'"
522 die "Provide either a commit or a repository and commit."
523 fi
524}
525
526cmd_add_repository()
527{
528 echo "git fetch" "$@"
529 repository=$1
530 refspec=$2
531 git fetch "$@" || exit $?
532 revs=FETCH_HEAD
533 set -- $revs
534 cmd_add_commit "$@"
535}
536
537cmd_add_commit()
538{
539 revs=$(git rev-parse $default --revs-only "$@") || exit $?
540 set -- $revs
541 rev="$1"
542
543 debug "Adding $dir as '$rev'..."
544 git read-tree --prefix="$dir" $rev || exit $?
545 git checkout -- "$dir" || exit $?
546 tree=$(git write-tree) || exit $?
547
548 headrev=$(git rev-parse HEAD) || exit $?
549 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
550 headp="-p $headrev"
551 else
552 headp=
553 fi
554
555 if [ -n "$squash" ]; then
556 rev=$(new_squash_commit "" "" "$rev") || exit $?
557 commit=$(add_squashed_msg "$rev" "$dir" |
558 git commit-tree $tree $headp -p "$rev") || exit $?
559 else
560 commit=$(add_msg "$dir" "$headrev" "$rev" |
561 git commit-tree $tree $headp -p "$rev") || exit $?
562 fi
563 git reset "$commit" || exit $?
564
565 say "Added dir '$dir'"
566}
567
568cmd_split()
569{
570 debug "Splitting $dir..."
571 cache_setup || exit $?
572
573 if [ -n "$onto" ]; then
574 debug "Reading history for --onto=$onto..."
575 git rev-list $onto |
576 while read rev; do
577 # the 'onto' history is already just the subdir, so
578 # any parent we find there can be used verbatim
579 debug " cache: $rev"
580 cache_set $rev $rev
581 done
582 fi
583
584 if [ -n "$ignore_joins" ]; then
585 unrevs=
586 else
587 unrevs="$(find_existing_splits "$dir" "$revs")"
588 fi
589
590 # We can't restrict rev-list to only $dir here, because some of our
591 # parents have the $dir contents the root, and those won't match.
592 # (and rev-list --follow doesn't seem to solve this)
593 grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
594 revmax=$(eval "$grl" | wc -l)
595 revcount=0
596 createcount=0
597 eval "$grl" |
598 while read rev parents; do
599 revcount=$(($revcount + 1))
600 say -n "$revcount/$revmax ($createcount)
"
601 debug "Processing commit: $rev"
602 exists=$(cache_get $rev)
603 if [ -n "$exists" ]; then
604 debug " prior: $exists"
605 continue
606 fi
607 createcount=$(($createcount + 1))
608 debug " parents: $parents"
609 newparents=$(cache_get $parents)
610 debug " newparents: $newparents"
611
612 tree=$(subtree_for_commit $rev "$dir")
613 debug " tree is: $tree"
614
615 check_parents $parents
616
617 # ugly. is there no better way to tell if this is a subtree
618 # vs. a mainline commit? Does it matter?
619 if [ -z $tree ]; then
620 set_notree $rev
621 if [ -n "$newparents" ]; then
622 cache_set $rev $rev
623 fi
624 continue
625 fi
626
627 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
628 debug " newrev is: $newrev"
629 cache_set $rev $newrev
630 cache_set latest_new $newrev
631 cache_set latest_old $rev
632 done || exit $?
633 latest_new=$(cache_get latest_new)
634 if [ -z "$latest_new" ]; then
635 die "No new revisions were found"
636 fi
637
638 if [ -n "$rejoin" ]; then
639 debug "Merging split branch into HEAD..."
640 latest_old=$(cache_get latest_old)
641 git merge -s ours \
642 -m "$(rejoin_msg $dir $latest_old $latest_new)" \
643 $latest_new >&2 || exit $?
644 fi
645 if [ -n "$branch" ]; then
646 if rev_exists "refs/heads/$branch"; then
647 if ! rev_is_descendant_of_branch $latest_new $branch; then
648 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
649 fi
650 action='Updated'
651 else
652 action='Created'
653 fi
654 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
655 say "$action branch '$branch'"
656 fi
657 echo $latest_new
658 exit 0
659}
660
661cmd_merge()
662{
663 revs=$(git rev-parse $default --revs-only "$@") || exit $?
664 ensure_clean
665
666 set -- $revs
667 if [ $# -ne 1 ]; then
668 die "You must provide exactly one revision. Got: '$revs'"
669 fi
670 rev="$1"
671
672 if [ -n "$squash" ]; then
673 first_split="$(find_latest_squash "$dir")"
674 if [ -z "$first_split" ]; then
675 die "Can't squash-merge: '$dir' was never added."
676 fi
677 set $first_split
678 old=$1
679 sub=$2
680 if [ "$sub" = "$rev" ]; then
681 say "Subtree is already at commit $rev."
682 exit 0
683 fi
684 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
685 debug "New squash commit: $new"
686 rev="$new"
687 fi
688
689 version=$(git version)
690 if [ "$version" \< "git version 1.7" ]; then
691 if [ -n "$message" ]; then
692 git merge -s subtree --message="$message" $rev
693 else
694 git merge -s subtree $rev
695 fi
696 else
697 if [ -n "$message" ]; then
698 git merge -Xsubtree="$prefix" --message="$message" $rev
699 else
700 git merge -Xsubtree="$prefix" $rev
701 fi
702 fi
703}
704
705cmd_pull()
706{
707 if [ $# -ne 2 ]; then
708 die "You must provide <repository> <ref>"
709 fi
710 ensure_clean
711 ensure_valid_ref_format "$2"
712 git fetch "$@" || exit $?
713 revs=FETCH_HEAD
714 set -- $revs
715 cmd_merge "$@"
716}
717
718cmd_push()
719{
720 if [ $# -ne 2 ]; then
721 die "You must provide <repository> <ref>"
722 fi
723 ensure_valid_ref_format "$2"
724 if [ -e "$dir" ]; then
725 repository=$1
726 refspec=$2
727 echo "git push using: " $repository $refspec
728 localrev=$(git subtree split --prefix="$prefix") || die
729 git push $repository $localrev:refs/heads/$refspec
730 else
731 die "'$dir' must already exist. Try 'git subtree add'."
732 fi
733}
734
735"cmd_$command" "$@"