1#!/bin/sh
2#
3# git-subtree.sh: split/join git repositories in subdirectories of this one
4#
5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6#
7if test $# -eq 0
8then
9 set -- -h
10fi
11OPTS_SPEC="\
12git subtree add --prefix=<prefix> <commit>
13git subtree add --prefix=<prefix> <repository> <ref>
14git subtree merge --prefix=<prefix> <commit>
15git subtree pull --prefix=<prefix> <repository> <ref>
16git subtree push --prefix=<prefix> <repository> <ref>
17git subtree split --prefix=<prefix> <commit...>
18--
19h,help show the help
20q quiet
21d show debug messages
22P,prefix= the name of the subdir to split out
23m,message= use the given message as the commit message for the merge commit
24 options for 'split'
25annotate= add a prefix to commit message of new commits
26b,branch= create a new branch from the split subtree
27ignore-joins ignore prior --rejoin commits
28onto= try connecting new tree to an existing one
29rejoin merge the new branch back into HEAD
30 options for 'add', 'merge', and 'pull'
31squash merge subtree changes as a single commit
32"
33eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
34
35PATH=$PATH:$(git --exec-path)
36. git-sh-setup
37
38require_work_tree
39
40quiet=
41branch=
42debug=
43command=
44onto=
45rejoin=
46ignore_joins=
47annotate=
48squash=
49message=
50prefix=
51
52debug () {
53 if test -n "$debug"
54 then
55 printf "%s\n" "$*" >&2
56 fi
57}
58
59say () {
60 if test -z "$quiet"
61 then
62 printf "%s\n" "$*" >&2
63 fi
64}
65
66progress () {
67 if test -z "$quiet"
68 then
69 printf "%s\r" "$*" >&2
70 fi
71}
72
73assert () {
74 if ! "$@"
75 then
76 die "assertion failed: " "$@"
77 fi
78}
79
80
81while test $# -gt 0
82do
83 opt="$1"
84 shift
85
86 case "$opt" in
87 -q)
88 quiet=1
89 ;;
90 -d)
91 debug=1
92 ;;
93 --annotate)
94 annotate="$1"
95 shift
96 ;;
97 --no-annotate)
98 annotate=
99 ;;
100 -b)
101 branch="$1"
102 shift
103 ;;
104 -P)
105 prefix="${1%/}"
106 shift
107 ;;
108 -m)
109 message="$1"
110 shift
111 ;;
112 --no-prefix)
113 prefix=
114 ;;
115 --onto)
116 onto="$1"
117 shift
118 ;;
119 --no-onto)
120 onto=
121 ;;
122 --rejoin)
123 rejoin=1
124 ;;
125 --no-rejoin)
126 rejoin=
127 ;;
128 --ignore-joins)
129 ignore_joins=1
130 ;;
131 --no-ignore-joins)
132 ignore_joins=
133 ;;
134 --squash)
135 squash=1
136 ;;
137 --no-squash)
138 squash=
139 ;;
140 --)
141 break
142 ;;
143 *)
144 die "Unexpected option: $opt"
145 ;;
146 esac
147done
148
149command="$1"
150shift
151
152case "$command" in
153add|merge|pull)
154 default=
155 ;;
156split|push)
157 default="--default HEAD"
158 ;;
159*)
160 die "Unknown command '$command'"
161 ;;
162esac
163
164if test -z "$prefix"
165then
166 die "You must provide the --prefix option."
167fi
168
169case "$command" in
170add)
171 test -e "$prefix" &&
172 die "prefix '$prefix' already exists."
173 ;;
174*)
175 test -e "$prefix" ||
176 die "'$prefix' does not exist; use 'git subtree add'"
177 ;;
178esac
179
180dir="$(dirname "$prefix/.")"
181
182if test "$command" != "pull" &&
183 test "$command" != "add" &&
184 test "$command" != "push"
185then
186 revs=$(git rev-parse $default --revs-only "$@") || exit $?
187 dirs=$(git rev-parse --no-revs --no-flags "$@") || exit $?
188 if test -n "$dirs"
189 then
190 die "Error: Use --prefix instead of bare filenames."
191 fi
192fi
193
194debug "command: {$command}"
195debug "quiet: {$quiet}"
196debug "revs: {$revs}"
197debug "dir: {$dir}"
198debug "opts: {$*}"
199debug
200
201cache_setup () {
202 cachedir="$GIT_DIR/subtree-cache/$$"
203 rm -rf "$cachedir" ||
204 die "Can't delete old cachedir: $cachedir"
205 mkdir -p "$cachedir" ||
206 die "Can't create new cachedir: $cachedir"
207 mkdir -p "$cachedir/notree" ||
208 die "Can't create new cachedir: $cachedir/notree"
209 debug "Using cachedir: $cachedir" >&2
210}
211
212cache_get () {
213 for oldrev in "$@"
214 do
215 if test -r "$cachedir/$oldrev"
216 then
217 read newrev <"$cachedir/$oldrev"
218 echo $newrev
219 fi
220 done
221}
222
223cache_miss () {
224 for oldrev in "$@"
225 do
226 if ! test -r "$cachedir/$oldrev"
227 then
228 echo $oldrev
229 fi
230 done
231}
232
233check_parents () {
234 missed=$(cache_miss "$1")
235 local indent=$(($2 + 1))
236 for miss in $missed
237 do
238 if ! test -r "$cachedir/notree/$miss"
239 then
240 debug " incorrect order: $miss"
241 process_split_commit "$miss" "" "$indent"
242 fi
243 done
244}
245
246set_notree () {
247 echo "1" > "$cachedir/notree/$1"
248}
249
250cache_set () {
251 oldrev="$1"
252 newrev="$2"
253 if test "$oldrev" != "latest_old" &&
254 test "$oldrev" != "latest_new" &&
255 test -e "$cachedir/$oldrev"
256 then
257 die "cache for $oldrev already exists!"
258 fi
259 echo "$newrev" >"$cachedir/$oldrev"
260}
261
262rev_exists () {
263 if git rev-parse "$1" >/dev/null 2>&1
264 then
265 return 0
266 else
267 return 1
268 fi
269}
270
271rev_is_descendant_of_branch () {
272 newrev="$1"
273 branch="$2"
274 branch_hash=$(git rev-parse "$branch")
275 match=$(git rev-list -1 "$branch_hash" "^$newrev")
276
277 if test -z "$match"
278 then
279 return 0
280 else
281 return 1
282 fi
283}
284
285# if a commit doesn't have a parent, this might not work. But we only want
286# to remove the parent from the rev-list, and since it doesn't exist, it won't
287# be there anyway, so do nothing in that case.
288try_remove_previous () {
289 if rev_exists "$1^"
290 then
291 echo "^$1^"
292 fi
293}
294
295find_latest_squash () {
296 debug "Looking for latest squash ($dir)..."
297 dir="$1"
298 sq=
299 main=
300 sub=
301 git log --grep="^git-subtree-dir: $dir/*\$" \
302 --no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
303 while read a b junk
304 do
305 debug "$a $b $junk"
306 debug "{{$sq/$main/$sub}}"
307 case "$a" in
308 START)
309 sq="$b"
310 ;;
311 git-subtree-mainline:)
312 main="$b"
313 ;;
314 git-subtree-split:)
315 sub="$(git rev-parse "$b^0")" ||
316 die "could not rev-parse split hash $b from commit $sq"
317 ;;
318 END)
319 if test -n "$sub"
320 then
321 if test -n "$main"
322 then
323 # a rejoin commit?
324 # Pretend its sub was a squash.
325 sq="$sub"
326 fi
327 debug "Squash found: $sq $sub"
328 echo "$sq" "$sub"
329 break
330 fi
331 sq=
332 main=
333 sub=
334 ;;
335 esac
336 done
337}
338
339find_existing_splits () {
340 debug "Looking for prior splits..."
341 dir="$1"
342 revs="$2"
343 main=
344 sub=
345 local grep_format="^git-subtree-dir: $dir/*\$"
346 if test -n "$ignore_joins"
347 then
348 grep_format="^Add '$dir/' from commit '"
349 fi
350 git log --grep="$grep_format" \
351 --no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
352 while read a b junk
353 do
354 case "$a" in
355 START)
356 sq="$b"
357 ;;
358 git-subtree-mainline:)
359 main="$b"
360 ;;
361 git-subtree-split:)
362 sub="$(git rev-parse "$b^0")" ||
363 die "could not rev-parse split hash $b from commit $sq"
364 ;;
365 END)
366 debug " Main is: '$main'"
367 if test -z "$main" -a -n "$sub"
368 then
369 # squash commits refer to a subtree
370 debug " Squash: $sq from $sub"
371 cache_set "$sq" "$sub"
372 fi
373 if test -n "$main" -a -n "$sub"
374 then
375 debug " Prior: $main -> $sub"
376 cache_set $main $sub
377 cache_set $sub $sub
378 try_remove_previous "$main"
379 try_remove_previous "$sub"
380 fi
381 main=
382 sub=
383 ;;
384 esac
385 done
386}
387
388copy_commit () {
389 # We're going to set some environment vars here, so
390 # do it in a subshell to get rid of them safely later
391 debug copy_commit "{$1}" "{$2}" "{$3}"
392 git log -1 --no-show-signature --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' "$1" |
393 (
394 read GIT_AUTHOR_NAME
395 read GIT_AUTHOR_EMAIL
396 read GIT_AUTHOR_DATE
397 read GIT_COMMITTER_NAME
398 read GIT_COMMITTER_EMAIL
399 read GIT_COMMITTER_DATE
400 export GIT_AUTHOR_NAME \
401 GIT_AUTHOR_EMAIL \
402 GIT_AUTHOR_DATE \
403 GIT_COMMITTER_NAME \
404 GIT_COMMITTER_EMAIL \
405 GIT_COMMITTER_DATE
406 (
407 printf "%s" "$annotate"
408 cat
409 ) |
410 git commit-tree "$2" $3 # reads the rest of stdin
411 ) || die "Can't copy commit $1"
412}
413
414add_msg () {
415 dir="$1"
416 latest_old="$2"
417 latest_new="$3"
418 if test -n "$message"
419 then
420 commit_message="$message"
421 else
422 commit_message="Add '$dir/' from commit '$latest_new'"
423 fi
424 cat <<-EOF
425 $commit_message
426
427 git-subtree-dir: $dir
428 git-subtree-mainline: $latest_old
429 git-subtree-split: $latest_new
430 EOF
431}
432
433add_squashed_msg () {
434 if test -n "$message"
435 then
436 echo "$message"
437 else
438 echo "Merge commit '$1' as '$2'"
439 fi
440}
441
442rejoin_msg () {
443 dir="$1"
444 latest_old="$2"
445 latest_new="$3"
446 if test -n "$message"
447 then
448 commit_message="$message"
449 else
450 commit_message="Split '$dir/' into commit '$latest_new'"
451 fi
452 cat <<-EOF
453 $commit_message
454
455 git-subtree-dir: $dir
456 git-subtree-mainline: $latest_old
457 git-subtree-split: $latest_new
458 EOF
459}
460
461squash_msg () {
462 dir="$1"
463 oldsub="$2"
464 newsub="$3"
465 newsub_short=$(git rev-parse --short "$newsub")
466
467 if test -n "$oldsub"
468 then
469 oldsub_short=$(git rev-parse --short "$oldsub")
470 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
471 echo
472 git log --no-show-signature --pretty=tformat:'%h %s' "$oldsub..$newsub"
473 git log --no-show-signature --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
474 else
475 echo "Squashed '$dir/' content from commit $newsub_short"
476 fi
477
478 echo
479 echo "git-subtree-dir: $dir"
480 echo "git-subtree-split: $newsub"
481}
482
483toptree_for_commit () {
484 commit="$1"
485 git rev-parse --verify "$commit^{tree}" || exit $?
486}
487
488subtree_for_commit () {
489 commit="$1"
490 dir="$2"
491 git ls-tree "$commit" -- "$dir" |
492 while read mode type tree name
493 do
494 assert test "$name" = "$dir"
495 assert test "$type" = "tree" -o "$type" = "commit"
496 test "$type" = "commit" && continue # ignore submodules
497 echo $tree
498 break
499 done
500}
501
502tree_changed () {
503 tree=$1
504 shift
505 if test $# -ne 1
506 then
507 return 0 # weird parents, consider it changed
508 else
509 ptree=$(toptree_for_commit $1)
510 if test "$ptree" != "$tree"
511 then
512 return 0 # changed
513 else
514 return 1 # not changed
515 fi
516 fi
517}
518
519new_squash_commit () {
520 old="$1"
521 oldsub="$2"
522 newsub="$3"
523 tree=$(toptree_for_commit $newsub) || exit $?
524 if test -n "$old"
525 then
526 squash_msg "$dir" "$oldsub" "$newsub" |
527 git commit-tree "$tree" -p "$old" || exit $?
528 else
529 squash_msg "$dir" "" "$newsub" |
530 git commit-tree "$tree" || exit $?
531 fi
532}
533
534copy_or_skip () {
535 rev="$1"
536 tree="$2"
537 newparents="$3"
538 assert test -n "$tree"
539
540 identical=
541 nonidentical=
542 p=
543 gotparents=
544 copycommit=
545 for parent in $newparents
546 do
547 ptree=$(toptree_for_commit $parent) || exit $?
548 test -z "$ptree" && continue
549 if test "$ptree" = "$tree"
550 then
551 # an identical parent could be used in place of this rev.
552 if test -n "$identical"
553 then
554 # if a previous identical parent was found, check whether
555 # one is already an ancestor of the other
556 mergebase=$(git merge-base $identical $parent)
557 if test "$identical" = "$mergebase"
558 then
559 # current identical commit is an ancestor of parent
560 identical="$parent"
561 elif test "$parent" != "$mergebase"
562 then
563 # no common history; commit must be copied
564 copycommit=1
565 fi
566 else
567 # first identical parent detected
568 identical="$parent"
569 fi
570 else
571 nonidentical="$parent"
572 fi
573
574 # sometimes both old parents map to the same newparent;
575 # eliminate duplicates
576 is_new=1
577 for gp in $gotparents
578 do
579 if test "$gp" = "$parent"
580 then
581 is_new=
582 break
583 fi
584 done
585 if test -n "$is_new"
586 then
587 gotparents="$gotparents $parent"
588 p="$p -p $parent"
589 fi
590 done
591
592 if test -n "$identical" && test -n "$nonidentical"
593 then
594 extras=$(git rev-list --count $identical..$nonidentical)
595 if test "$extras" -ne 0
596 then
597 # we need to preserve history along the other branch
598 copycommit=1
599 fi
600 fi
601 if test -n "$identical" && test -z "$copycommit"
602 then
603 echo $identical
604 else
605 copy_commit "$rev" "$tree" "$p" || exit $?
606 fi
607}
608
609ensure_clean () {
610 if ! git diff-index HEAD --exit-code --quiet 2>&1
611 then
612 die "Working tree has modifications. Cannot add."
613 fi
614 if ! git diff-index --cached HEAD --exit-code --quiet 2>&1
615 then
616 die "Index has modifications. Cannot add."
617 fi
618}
619
620ensure_valid_ref_format () {
621 git check-ref-format "refs/heads/$1" ||
622 die "'$1' does not look like a ref"
623}
624
625process_split_commit () {
626 local rev="$1"
627 local parents="$2"
628 local indent=$3
629
630 if test $indent -eq 0
631 then
632 revcount=$(($revcount + 1))
633 else
634 # processing commit without normal parent information;
635 # fetch from repo
636 parents=$(git rev-parse "$rev^@")
637 extracount=$(($extracount + 1))
638 fi
639
640 progress "$revcount/$revmax ($createcount) [$extracount]"
641
642 debug "Processing commit: $rev"
643 exists=$(cache_get "$rev")
644 if test -n "$exists"
645 then
646 debug " prior: $exists"
647 return
648 fi
649 createcount=$(($createcount + 1))
650 debug " parents: $parents"
651 check_parents "$parents" "$indent"
652 newparents=$(cache_get $parents)
653 debug " newparents: $newparents"
654
655 tree=$(subtree_for_commit "$rev" "$dir")
656 debug " tree is: $tree"
657
658 # ugly. is there no better way to tell if this is a subtree
659 # vs. a mainline commit? Does it matter?
660 if test -z "$tree"
661 then
662 set_notree "$rev"
663 if test -n "$newparents"
664 then
665 cache_set "$rev" "$rev"
666 fi
667 return
668 fi
669
670 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
671 debug " newrev is: $newrev"
672 cache_set "$rev" "$newrev"
673 cache_set latest_new "$newrev"
674 cache_set latest_old "$rev"
675}
676
677cmd_add () {
678 if test -e "$dir"
679 then
680 die "'$dir' already exists. Cannot add."
681 fi
682
683 ensure_clean
684
685 if test $# -eq 1
686 then
687 git rev-parse -q --verify "$1^{commit}" >/dev/null ||
688 die "'$1' does not refer to a commit"
689
690 cmd_add_commit "$@"
691
692 elif test $# -eq 2
693 then
694 # Technically we could accept a refspec here but we're
695 # just going to turn around and add FETCH_HEAD under the
696 # specified directory. Allowing a refspec might be
697 # misleading because we won't do anything with any other
698 # branches fetched via the refspec.
699 ensure_valid_ref_format "$2"
700
701 cmd_add_repository "$@"
702 else
703 say "error: parameters were '$@'"
704 die "Provide either a commit or a repository and commit."
705 fi
706}
707
708cmd_add_repository () {
709 echo "git fetch" "$@"
710 repository=$1
711 refspec=$2
712 git fetch "$@" || exit $?
713 revs=FETCH_HEAD
714 set -- $revs
715 cmd_add_commit "$@"
716}
717
718cmd_add_commit () {
719 revs=$(git rev-parse $default --revs-only "$@") || exit $?
720 set -- $revs
721 rev="$1"
722
723 debug "Adding $dir as '$rev'..."
724 git read-tree --prefix="$dir" $rev || exit $?
725 git checkout -- "$dir" || exit $?
726 tree=$(git write-tree) || exit $?
727
728 headrev=$(git rev-parse HEAD) || exit $?
729 if test -n "$headrev" && test "$headrev" != "$rev"
730 then
731 headp="-p $headrev"
732 else
733 headp=
734 fi
735
736 if test -n "$squash"
737 then
738 rev=$(new_squash_commit "" "" "$rev") || exit $?
739 commit=$(add_squashed_msg "$rev" "$dir" |
740 git commit-tree "$tree" $headp -p "$rev") || exit $?
741 else
742 revp=$(peel_committish "$rev") &&
743 commit=$(add_msg "$dir" $headrev "$rev" |
744 git commit-tree "$tree" $headp -p "$revp") || exit $?
745 fi
746 git reset "$commit" || exit $?
747
748 say "Added dir '$dir'"
749}
750
751cmd_split () {
752 debug "Splitting $dir..."
753 cache_setup || exit $?
754
755 if test -n "$onto"
756 then
757 debug "Reading history for --onto=$onto..."
758 git rev-list $onto |
759 while read rev
760 do
761 # the 'onto' history is already just the subdir, so
762 # any parent we find there can be used verbatim
763 debug " cache: $rev"
764 cache_set "$rev" "$rev"
765 done
766 fi
767
768 unrevs="$(find_existing_splits "$dir" "$revs")"
769
770 # We can't restrict rev-list to only $dir here, because some of our
771 # parents have the $dir contents the root, and those won't match.
772 # (and rev-list --follow doesn't seem to solve this)
773 grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
774 revmax=$(eval "$grl" | wc -l)
775 revcount=0
776 createcount=0
777 extracount=0
778 eval "$grl" |
779 while read rev parents
780 do
781 process_split_commit "$rev" "$parents" 0
782 done || exit $?
783
784 latest_new=$(cache_get latest_new)
785 if test -z "$latest_new"
786 then
787 die "No new revisions were found"
788 fi
789
790 if test -n "$rejoin"
791 then
792 debug "Merging split branch into HEAD..."
793 latest_old=$(cache_get latest_old)
794 git merge -s ours \
795 --allow-unrelated-histories \
796 -m "$(rejoin_msg "$dir" "$latest_old" "$latest_new")" \
797 "$latest_new" >&2 || exit $?
798 fi
799 if test -n "$branch"
800 then
801 if rev_exists "refs/heads/$branch"
802 then
803 if ! rev_is_descendant_of_branch "$latest_new" "$branch"
804 then
805 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
806 fi
807 action='Updated'
808 else
809 action='Created'
810 fi
811 git update-ref -m 'subtree split' \
812 "refs/heads/$branch" "$latest_new" || exit $?
813 say "$action branch '$branch'"
814 fi
815 echo "$latest_new"
816 exit 0
817}
818
819cmd_merge () {
820 revs=$(git rev-parse $default --revs-only "$@") || exit $?
821 ensure_clean
822
823 set -- $revs
824 if test $# -ne 1
825 then
826 die "You must provide exactly one revision. Got: '$revs'"
827 fi
828 rev="$1"
829
830 if test -n "$squash"
831 then
832 first_split="$(find_latest_squash "$dir")"
833 if test -z "$first_split"
834 then
835 die "Can't squash-merge: '$dir' was never added."
836 fi
837 set $first_split
838 old=$1
839 sub=$2
840 if test "$sub" = "$rev"
841 then
842 say "Subtree is already at commit $rev."
843 exit 0
844 fi
845 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
846 debug "New squash commit: $new"
847 rev="$new"
848 fi
849
850 version=$(git version)
851 if test "$version" \< "git version 1.7"
852 then
853 if test -n "$message"
854 then
855 git merge -s subtree --message="$message" "$rev"
856 else
857 git merge -s subtree "$rev"
858 fi
859 else
860 if test -n "$message"
861 then
862 git merge -Xsubtree="$prefix" \
863 --message="$message" "$rev"
864 else
865 git merge -Xsubtree="$prefix" $rev
866 fi
867 fi
868}
869
870cmd_pull () {
871 if test $# -ne 2
872 then
873 die "You must provide <repository> <ref>"
874 fi
875 ensure_clean
876 ensure_valid_ref_format "$2"
877 git fetch "$@" || exit $?
878 revs=FETCH_HEAD
879 set -- $revs
880 cmd_merge "$@"
881}
882
883cmd_push () {
884 if test $# -ne 2
885 then
886 die "You must provide <repository> <ref>"
887 fi
888 ensure_valid_ref_format "$2"
889 if test -e "$dir"
890 then
891 repository=$1
892 refspec=$2
893 echo "git push using: " "$repository" "$refspec"
894 localrev=$(git subtree split --prefix="$prefix") || die
895 git push "$repository" "$localrev":"refs/heads/$refspec"
896 else
897 die "'$dir' must already exist. Try 'git subtree add'."
898 fi
899}
900
901"cmd_$command" "$@"