1#!/bin/sh
2#
3# git-subtree.sh: split/join git repositories in subdirectories of this one
4#
5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6#
7if [ $# -eq 0 ]; then
8 set -- -h
9fi
10OPTS_SPEC="\
11git subtree add --prefix=<prefix> <commit>
12git subtree add --prefix=<prefix> <repository> <ref>
13git subtree merge --prefix=<prefix> <commit>
14git subtree pull --prefix=<prefix> <repository> <ref>
15git subtree push --prefix=<prefix> <repository> <ref>
16git subtree split --prefix=<prefix> <commit...>
17--
18h,help show the help
19q quiet
20d show debug messages
21P,prefix= the name of the subdir to split out
22m,message= use the given message as the commit message for the merge commit
23 options for 'split'
24annotate= add a prefix to commit message of new commits
25b,branch= create a new branch from the split subtree
26ignore-joins ignore prior --rejoin commits
27onto= try connecting new tree to an existing one
28rejoin merge the new branch back into HEAD
29 options for 'add', 'merge', and 'pull'
30squash merge subtree changes as a single commit
31"
32eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
33
34PATH=$PATH:$(git --exec-path)
35. git-sh-setup
36
37require_work_tree
38
39quiet=
40branch=
41debug=
42command=
43onto=
44rejoin=
45ignore_joins=
46annotate=
47squash=
48message=
49prefix=
50
51debug()
52{
53 if [ -n "$debug" ]; then
54 printf "%s\n" "$*" >&2
55 fi
56}
57
58say()
59{
60 if [ -z "$quiet" ]; then
61 printf "%s\n" "$*" >&2
62 fi
63}
64
65progress()
66{
67 if [ -z "$quiet" ]; then
68 printf "%s\r" "$*" >&2
69 fi
70}
71
72assert()
73{
74 if "$@"; then
75 :
76 else
77 die "assertion failed: " "$@"
78 fi
79}
80
81
82#echo "Options: $*"
83
84while [ $# -gt 0 ]; do
85 opt="$1"
86 shift
87 case "$opt" in
88 -q) quiet=1 ;;
89 -d) debug=1 ;;
90 --annotate) annotate="$1"; shift ;;
91 --no-annotate) annotate= ;;
92 -b) branch="$1"; shift ;;
93 -P) prefix="${1%/}"; shift ;;
94 -m) message="$1"; shift ;;
95 --no-prefix) prefix= ;;
96 --onto) onto="$1"; shift ;;
97 --no-onto) onto= ;;
98 --rejoin) rejoin=1 ;;
99 --no-rejoin) rejoin= ;;
100 --ignore-joins) ignore_joins=1 ;;
101 --no-ignore-joins) ignore_joins= ;;
102 --squash) squash=1 ;;
103 --no-squash) squash= ;;
104 --) break ;;
105 *) die "Unexpected option: $opt" ;;
106 esac
107done
108
109command="$1"
110shift
111case "$command" in
112 add|merge|pull) default= ;;
113 split|push) default="--default HEAD" ;;
114 *) die "Unknown command '$command'" ;;
115esac
116
117if [ -z "$prefix" ]; then
118 die "You must provide the --prefix option."
119fi
120
121case "$command" in
122 add) [ -e "$prefix" ] &&
123 die "prefix '$prefix' already exists." ;;
124 *) [ -e "$prefix" ] ||
125 die "'$prefix' does not exist; use 'git subtree add'" ;;
126esac
127
128dir="$(dirname "$prefix/.")"
129
130if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
131 revs=$(git rev-parse $default --revs-only "$@") || exit $?
132 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
133 if [ -n "$dirs" ]; then
134 die "Error: Use --prefix instead of bare filenames."
135 fi
136fi
137
138debug "command: {$command}"
139debug "quiet: {$quiet}"
140debug "revs: {$revs}"
141debug "dir: {$dir}"
142debug "opts: {$*}"
143debug
144
145cache_setup()
146{
147 cachedir="$GIT_DIR/subtree-cache/$$"
148 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
149 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
150 mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
151 debug "Using cachedir: $cachedir" >&2
152}
153
154cache_get()
155{
156 for oldrev in $*; do
157 if [ -r "$cachedir/$oldrev" ]; then
158 read newrev <"$cachedir/$oldrev"
159 echo $newrev
160 fi
161 done
162}
163
164cache_miss()
165{
166 for oldrev in $*; do
167 if [ ! -r "$cachedir/$oldrev" ]; then
168 echo $oldrev
169 fi
170 done
171}
172
173check_parents()
174{
175 missed=$(cache_miss $*)
176 for miss in $missed; do
177 if [ ! -r "$cachedir/notree/$miss" ]; then
178 debug " incorrect order: $miss"
179 fi
180 done
181}
182
183set_notree()
184{
185 echo "1" > "$cachedir/notree/$1"
186}
187
188cache_set()
189{
190 oldrev="$1"
191 newrev="$2"
192 if [ "$oldrev" != "latest_old" \
193 -a "$oldrev" != "latest_new" \
194 -a -e "$cachedir/$oldrev" ]; then
195 die "cache for $oldrev already exists!"
196 fi
197 echo "$newrev" >"$cachedir/$oldrev"
198}
199
200rev_exists()
201{
202 if git rev-parse "$1" >/dev/null 2>&1; then
203 return 0
204 else
205 return 1
206 fi
207}
208
209rev_is_descendant_of_branch()
210{
211 newrev="$1"
212 branch="$2"
213 branch_hash=$(git rev-parse $branch)
214 match=$(git rev-list -1 $branch_hash ^$newrev)
215
216 if [ -z "$match" ]; then
217 return 0
218 else
219 return 1
220 fi
221}
222
223# if a commit doesn't have a parent, this might not work. But we only want
224# to remove the parent from the rev-list, and since it doesn't exist, it won't
225# be there anyway, so do nothing in that case.
226try_remove_previous()
227{
228 if rev_exists "$1^"; then
229 echo "^$1^"
230 fi
231}
232
233find_latest_squash()
234{
235 debug "Looking for latest squash ($dir)..."
236 dir="$1"
237 sq=
238 main=
239 sub=
240 git log --grep="^git-subtree-dir: $dir/*\$" \
241 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
242 while read a b junk; do
243 debug "$a $b $junk"
244 debug "{{$sq/$main/$sub}}"
245 case "$a" in
246 START) sq="$b" ;;
247 git-subtree-mainline:) main="$b" ;;
248 git-subtree-split:) sub="$b" ;;
249 END)
250 if [ -n "$sub" ]; then
251 if [ -n "$main" ]; then
252 # a rejoin commit?
253 # Pretend its sub was a squash.
254 sq="$sub"
255 fi
256 debug "Squash found: $sq $sub"
257 echo "$sq" "$sub"
258 break
259 fi
260 sq=
261 main=
262 sub=
263 ;;
264 esac
265 done
266}
267
268find_existing_splits()
269{
270 debug "Looking for prior splits..."
271 dir="$1"
272 revs="$2"
273 main=
274 sub=
275 git log --grep="^git-subtree-dir: $dir/*\$" \
276 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
277 while read a b junk; do
278 case "$a" in
279 START) sq="$b" ;;
280 git-subtree-mainline:) main="$b" ;;
281 git-subtree-split:) sub="$b" ;;
282 END)
283 debug " Main is: '$main'"
284 if [ -z "$main" -a -n "$sub" ]; then
285 # squash commits refer to a subtree
286 debug " Squash: $sq from $sub"
287 cache_set "$sq" "$sub"
288 fi
289 if [ -n "$main" -a -n "$sub" ]; then
290 debug " Prior: $main -> $sub"
291 cache_set $main $sub
292 cache_set $sub $sub
293 try_remove_previous "$main"
294 try_remove_previous "$sub"
295 fi
296 main=
297 sub=
298 ;;
299 esac
300 done
301}
302
303copy_commit()
304{
305 # We're going to set some environment vars here, so
306 # do it in a subshell to get rid of them safely later
307 debug copy_commit "{$1}" "{$2}" "{$3}"
308 git log -1 --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' "$1" |
309 (
310 read GIT_AUTHOR_NAME
311 read GIT_AUTHOR_EMAIL
312 read GIT_AUTHOR_DATE
313 read GIT_COMMITTER_NAME
314 read GIT_COMMITTER_EMAIL
315 read GIT_COMMITTER_DATE
316 export GIT_AUTHOR_NAME \
317 GIT_AUTHOR_EMAIL \
318 GIT_AUTHOR_DATE \
319 GIT_COMMITTER_NAME \
320 GIT_COMMITTER_EMAIL \
321 GIT_COMMITTER_DATE
322 (printf "%s" "$annotate"; cat ) |
323 git commit-tree "$2" $3 # reads the rest of stdin
324 ) || die "Can't copy commit $1"
325}
326
327add_msg()
328{
329 dir="$1"
330 latest_old="$2"
331 latest_new="$3"
332 if [ -n "$message" ]; then
333 commit_message="$message"
334 else
335 commit_message="Add '$dir/' from commit '$latest_new'"
336 fi
337 cat <<-EOF
338 $commit_message
339
340 git-subtree-dir: $dir
341 git-subtree-mainline: $latest_old
342 git-subtree-split: $latest_new
343 EOF
344}
345
346add_squashed_msg()
347{
348 if [ -n "$message" ]; then
349 echo "$message"
350 else
351 echo "Merge commit '$1' as '$2'"
352 fi
353}
354
355rejoin_msg()
356{
357 dir="$1"
358 latest_old="$2"
359 latest_new="$3"
360 if [ -n "$message" ]; then
361 commit_message="$message"
362 else
363 commit_message="Split '$dir/' into commit '$latest_new'"
364 fi
365 cat <<-EOF
366 $commit_message
367
368 git-subtree-dir: $dir
369 git-subtree-mainline: $latest_old
370 git-subtree-split: $latest_new
371 EOF
372}
373
374squash_msg()
375{
376 dir="$1"
377 oldsub="$2"
378 newsub="$3"
379 newsub_short=$(git rev-parse --short "$newsub")
380
381 if [ -n "$oldsub" ]; then
382 oldsub_short=$(git rev-parse --short "$oldsub")
383 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
384 echo
385 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
386 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
387 else
388 echo "Squashed '$dir/' content from commit $newsub_short"
389 fi
390
391 echo
392 echo "git-subtree-dir: $dir"
393 echo "git-subtree-split: $newsub"
394}
395
396toptree_for_commit()
397{
398 commit="$1"
399 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
400}
401
402subtree_for_commit()
403{
404 commit="$1"
405 dir="$2"
406 git ls-tree "$commit" -- "$dir" |
407 while read mode type tree name; do
408 assert [ "$name" = "$dir" ]
409 assert [ "$type" = "tree" -o "$type" = "commit" ]
410 [ "$type" = "commit" ] && continue # ignore submodules
411 echo $tree
412 break
413 done
414}
415
416tree_changed()
417{
418 tree=$1
419 shift
420 if [ $# -ne 1 ]; then
421 return 0 # weird parents, consider it changed
422 else
423 ptree=$(toptree_for_commit $1)
424 if [ "$ptree" != "$tree" ]; then
425 return 0 # changed
426 else
427 return 1 # not changed
428 fi
429 fi
430}
431
432new_squash_commit()
433{
434 old="$1"
435 oldsub="$2"
436 newsub="$3"
437 tree=$(toptree_for_commit $newsub) || exit $?
438 if [ -n "$old" ]; then
439 squash_msg "$dir" "$oldsub" "$newsub" |
440 git commit-tree "$tree" -p "$old" || exit $?
441 else
442 squash_msg "$dir" "" "$newsub" |
443 git commit-tree "$tree" || exit $?
444 fi
445}
446
447copy_or_skip()
448{
449 rev="$1"
450 tree="$2"
451 newparents="$3"
452 assert [ -n "$tree" ]
453
454 identical=
455 nonidentical=
456 p=
457 gotparents=
458 for parent in $newparents; do
459 ptree=$(toptree_for_commit $parent) || exit $?
460 [ -z "$ptree" ] && continue
461 if [ "$ptree" = "$tree" ]; then
462 # an identical parent could be used in place of this rev.
463 identical="$parent"
464 else
465 nonidentical="$parent"
466 fi
467
468 # sometimes both old parents map to the same newparent;
469 # eliminate duplicates
470 is_new=1
471 for gp in $gotparents; do
472 if [ "$gp" = "$parent" ]; then
473 is_new=
474 break
475 fi
476 done
477 if [ -n "$is_new" ]; then
478 gotparents="$gotparents $parent"
479 p="$p -p $parent"
480 fi
481 done
482
483 copycommit=
484 if [ -n "$identical" ] && [ -n "$nonidentical" ]; then
485 extras=$(git rev-list --count $identical..$nonidentical)
486 if [ "$extras" -ne 0 ]; then
487 # we need to preserve history along the other branch
488 copycommit=1
489 fi
490 fi
491 if [ -n "$identical" ] && [ -z "$copycommit" ]; then
492 echo $identical
493 else
494 copy_commit $rev $tree "$p" || exit $?
495 fi
496}
497
498ensure_clean()
499{
500 if ! git diff-index HEAD --exit-code --quiet 2>&1; then
501 die "Working tree has modifications. Cannot add."
502 fi
503 if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
504 die "Index has modifications. Cannot add."
505 fi
506}
507
508ensure_valid_ref_format()
509{
510 git check-ref-format "refs/heads/$1" ||
511 die "'$1' does not look like a ref"
512}
513
514cmd_add()
515{
516 if [ -e "$dir" ]; then
517 die "'$dir' already exists. Cannot add."
518 fi
519
520 ensure_clean
521
522 if [ $# -eq 1 ]; then
523 git rev-parse -q --verify "$1^{commit}" >/dev/null ||
524 die "'$1' does not refer to a commit"
525
526 "cmd_add_commit" "$@"
527 elif [ $# -eq 2 ]; then
528 # Technically we could accept a refspec here but we're
529 # just going to turn around and add FETCH_HEAD under the
530 # specified directory. Allowing a refspec might be
531 # misleading because we won't do anything with any other
532 # branches fetched via the refspec.
533 ensure_valid_ref_format "$2"
534
535 "cmd_add_repository" "$@"
536 else
537 say "error: parameters were '$@'"
538 die "Provide either a commit or a repository and commit."
539 fi
540}
541
542cmd_add_repository()
543{
544 echo "git fetch" "$@"
545 repository=$1
546 refspec=$2
547 git fetch "$@" || exit $?
548 revs=FETCH_HEAD
549 set -- $revs
550 cmd_add_commit "$@"
551}
552
553cmd_add_commit()
554{
555 revs=$(git rev-parse $default --revs-only "$@") || exit $?
556 set -- $revs
557 rev="$1"
558
559 debug "Adding $dir as '$rev'..."
560 git read-tree --prefix="$dir" $rev || exit $?
561 git checkout -- "$dir" || exit $?
562 tree=$(git write-tree) || exit $?
563
564 headrev=$(git rev-parse HEAD) || exit $?
565 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
566 headp="-p $headrev"
567 else
568 headp=
569 fi
570
571 if [ -n "$squash" ]; then
572 rev=$(new_squash_commit "" "" "$rev") || exit $?
573 commit=$(add_squashed_msg "$rev" "$dir" |
574 git commit-tree $tree $headp -p "$rev") || exit $?
575 else
576 revp=$(peel_committish "$rev") &&
577 commit=$(add_msg "$dir" "$headrev" "$rev" |
578 git commit-tree $tree $headp -p "$revp") || exit $?
579 fi
580 git reset "$commit" || exit $?
581
582 say "Added dir '$dir'"
583}
584
585cmd_split()
586{
587 debug "Splitting $dir..."
588 cache_setup || exit $?
589
590 if [ -n "$onto" ]; then
591 debug "Reading history for --onto=$onto..."
592 git rev-list $onto |
593 while read rev; do
594 # the 'onto' history is already just the subdir, so
595 # any parent we find there can be used verbatim
596 debug " cache: $rev"
597 cache_set $rev $rev
598 done
599 fi
600
601 if [ -n "$ignore_joins" ]; then
602 unrevs=
603 else
604 unrevs="$(find_existing_splits "$dir" "$revs")"
605 fi
606
607 # We can't restrict rev-list to only $dir here, because some of our
608 # parents have the $dir contents the root, and those won't match.
609 # (and rev-list --follow doesn't seem to solve this)
610 grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
611 revmax=$(eval "$grl" | wc -l)
612 revcount=0
613 createcount=0
614 eval "$grl" |
615 while read rev parents; do
616 revcount=$(($revcount + 1))
617 progress "$revcount/$revmax ($createcount)"
618 debug "Processing commit: $rev"
619 exists=$(cache_get $rev)
620 if [ -n "$exists" ]; then
621 debug " prior: $exists"
622 continue
623 fi
624 createcount=$(($createcount + 1))
625 debug " parents: $parents"
626 newparents=$(cache_get $parents)
627 debug " newparents: $newparents"
628
629 tree=$(subtree_for_commit $rev "$dir")
630 debug " tree is: $tree"
631
632 check_parents $parents
633
634 # ugly. is there no better way to tell if this is a subtree
635 # vs. a mainline commit? Does it matter?
636 if [ -z $tree ]; then
637 set_notree $rev
638 if [ -n "$newparents" ]; then
639 cache_set $rev $rev
640 fi
641 continue
642 fi
643
644 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
645 debug " newrev is: $newrev"
646 cache_set $rev $newrev
647 cache_set latest_new $newrev
648 cache_set latest_old $rev
649 done || exit $?
650 latest_new=$(cache_get latest_new)
651 if [ -z "$latest_new" ]; then
652 die "No new revisions were found"
653 fi
654
655 if [ -n "$rejoin" ]; then
656 debug "Merging split branch into HEAD..."
657 latest_old=$(cache_get latest_old)
658 git merge -s ours \
659 -m "$(rejoin_msg "$dir" $latest_old $latest_new)" \
660 $latest_new >&2 || exit $?
661 fi
662 if [ -n "$branch" ]; then
663 if rev_exists "refs/heads/$branch"; then
664 if ! rev_is_descendant_of_branch $latest_new $branch; then
665 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
666 fi
667 action='Updated'
668 else
669 action='Created'
670 fi
671 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
672 say "$action branch '$branch'"
673 fi
674 echo $latest_new
675 exit 0
676}
677
678cmd_merge()
679{
680 revs=$(git rev-parse $default --revs-only "$@") || exit $?
681 ensure_clean
682
683 set -- $revs
684 if [ $# -ne 1 ]; then
685 die "You must provide exactly one revision. Got: '$revs'"
686 fi
687 rev="$1"
688
689 if [ -n "$squash" ]; then
690 first_split="$(find_latest_squash "$dir")"
691 if [ -z "$first_split" ]; then
692 die "Can't squash-merge: '$dir' was never added."
693 fi
694 set $first_split
695 old=$1
696 sub=$2
697 if [ "$sub" = "$rev" ]; then
698 say "Subtree is already at commit $rev."
699 exit 0
700 fi
701 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
702 debug "New squash commit: $new"
703 rev="$new"
704 fi
705
706 version=$(git version)
707 if [ "$version" \< "git version 1.7" ]; then
708 if [ -n "$message" ]; then
709 git merge -s subtree --message="$message" $rev
710 else
711 git merge -s subtree $rev
712 fi
713 else
714 if [ -n "$message" ]; then
715 git merge -Xsubtree="$prefix" --message="$message" $rev
716 else
717 git merge -Xsubtree="$prefix" $rev
718 fi
719 fi
720}
721
722cmd_pull()
723{
724 if [ $# -ne 2 ]; then
725 die "You must provide <repository> <ref>"
726 fi
727 ensure_clean
728 ensure_valid_ref_format "$2"
729 git fetch "$@" || exit $?
730 revs=FETCH_HEAD
731 set -- $revs
732 cmd_merge "$@"
733}
734
735cmd_push()
736{
737 if [ $# -ne 2 ]; then
738 die "You must provide <repository> <ref>"
739 fi
740 ensure_valid_ref_format "$2"
741 if [ -e "$dir" ]; then
742 repository=$1
743 refspec=$2
744 echo "git push using: " $repository $refspec
745 localrev=$(git subtree split --prefix="$prefix") || die
746 git push "$repository" $localrev:refs/heads/$refspec
747 else
748 die "'$dir' must already exist. Try 'git subtree add'."
749 fi
750}
751
752"cmd_$command" "$@"