1#!/bin/sh
2#
3# git-subtree.sh: split/join git repositories in subdirectories of this one
4#
5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6#
7if [ $# -eq 0 ]; then
8 set -- -h
9fi
10OPTS_SPEC="\
11git subtree add --prefix=<prefix> <commit>
12git subtree add --prefix=<prefix> <repository> <ref>
13git subtree merge --prefix=<prefix> <commit>
14git subtree pull --prefix=<prefix> <repository> <ref>
15git subtree push --prefix=<prefix> <repository> <ref>
16git subtree split --prefix=<prefix> <commit...>
17--
18h,help show the help
19q quiet
20d show debug messages
21P,prefix= the name of the subdir to split out
22m,message= use the given message as the commit message for the merge commit
23 options for 'split'
24annotate= add a prefix to commit message of new commits
25b,branch= create a new branch from the split subtree
26ignore-joins ignore prior --rejoin commits
27onto= try connecting new tree to an existing one
28rejoin merge the new branch back into HEAD
29 options for 'add', 'merge', 'pull' and 'push'
30squash merge subtree changes as a single commit
31"
32eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
33
34PATH=$PATH:$(git --exec-path)
35. git-sh-setup
36
37require_work_tree
38
39quiet=
40branch=
41debug=
42command=
43onto=
44rejoin=
45ignore_joins=
46annotate=
47squash=
48message=
49prefix=
50
51debug()
52{
53 if [ -n "$debug" ]; then
54 echo "$@" >&2
55 fi
56}
57
58say()
59{
60 if [ -z "$quiet" ]; then
61 echo "$@" >&2
62 fi
63}
64
65assert()
66{
67 if "$@"; then
68 :
69 else
70 die "assertion failed: " "$@"
71 fi
72}
73
74
75#echo "Options: $*"
76
77while [ $# -gt 0 ]; do
78 opt="$1"
79 shift
80 case "$opt" in
81 -q) quiet=1 ;;
82 -d) debug=1 ;;
83 --annotate) annotate="$1"; shift ;;
84 --no-annotate) annotate= ;;
85 -b) branch="$1"; shift ;;
86 -P) prefix="$1"; shift ;;
87 -m) message="$1"; shift ;;
88 --no-prefix) prefix= ;;
89 --onto) onto="$1"; shift ;;
90 --no-onto) onto= ;;
91 --rejoin) rejoin=1 ;;
92 --no-rejoin) rejoin= ;;
93 --ignore-joins) ignore_joins=1 ;;
94 --no-ignore-joins) ignore_joins= ;;
95 --squash) squash=1 ;;
96 --no-squash) squash= ;;
97 --) break ;;
98 *) die "Unexpected option: $opt" ;;
99 esac
100done
101
102command="$1"
103shift
104case "$command" in
105 add|merge|pull) default= ;;
106 split|push) default="--default HEAD" ;;
107 *) die "Unknown command '$command'" ;;
108esac
109
110if [ -z "$prefix" ]; then
111 die "You must provide the --prefix option."
112fi
113
114case "$command" in
115 add) [ -e "$prefix" ] &&
116 die "prefix '$prefix' already exists." ;;
117 *) [ -e "$prefix" ] ||
118 die "'$prefix' does not exist; use 'git subtree add'" ;;
119esac
120
121dir="$(dirname "$prefix/.")"
122
123if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
124 revs=$(git rev-parse $default --revs-only "$@") || exit $?
125 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
126 if [ -n "$dirs" ]; then
127 die "Error: Use --prefix instead of bare filenames."
128 fi
129fi
130
131debug "command: {$command}"
132debug "quiet: {$quiet}"
133debug "revs: {$revs}"
134debug "dir: {$dir}"
135debug "opts: {$*}"
136debug
137
138cache_setup()
139{
140 cachedir="$GIT_DIR/subtree-cache/$$"
141 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
142 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
143 mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
144 debug "Using cachedir: $cachedir" >&2
145}
146
147cache_get()
148{
149 for oldrev in $*; do
150 if [ -r "$cachedir/$oldrev" ]; then
151 read newrev <"$cachedir/$oldrev"
152 echo $newrev
153 fi
154 done
155}
156
157cache_miss()
158{
159 for oldrev in $*; do
160 if [ ! -r "$cachedir/$oldrev" ]; then
161 echo $oldrev
162 fi
163 done
164}
165
166check_parents()
167{
168 missed=$(cache_miss $*)
169 for miss in $missed; do
170 if [ ! -r "$cachedir/notree/$miss" ]; then
171 debug " incorrect order: $miss"
172 fi
173 done
174}
175
176set_notree()
177{
178 echo "1" > "$cachedir/notree/$1"
179}
180
181cache_set()
182{
183 oldrev="$1"
184 newrev="$2"
185 if [ "$oldrev" != "latest_old" \
186 -a "$oldrev" != "latest_new" \
187 -a -e "$cachedir/$oldrev" ]; then
188 die "cache for $oldrev already exists!"
189 fi
190 echo "$newrev" >"$cachedir/$oldrev"
191}
192
193rev_exists()
194{
195 if git rev-parse "$1" >/dev/null 2>&1; then
196 return 0
197 else
198 return 1
199 fi
200}
201
202rev_is_descendant_of_branch()
203{
204 newrev="$1"
205 branch="$2"
206 branch_hash=$(git rev-parse $branch)
207 match=$(git rev-list -1 $branch_hash ^$newrev)
208
209 if [ -z "$match" ]; then
210 return 0
211 else
212 return 1
213 fi
214}
215
216# if a commit doesn't have a parent, this might not work. But we only want
217# to remove the parent from the rev-list, and since it doesn't exist, it won't
218# be there anyway, so do nothing in that case.
219try_remove_previous()
220{
221 if rev_exists "$1^"; then
222 echo "^$1^"
223 fi
224}
225
226find_latest_squash()
227{
228 debug "Looking for latest squash ($dir)..."
229 dir="$1"
230 sq=
231 main=
232 sub=
233 git log --grep="^git-subtree-dir: $dir/*\$" \
234 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
235 while read a b junk; do
236 debug "$a $b $junk"
237 debug "{{$sq/$main/$sub}}"
238 case "$a" in
239 START) sq="$b" ;;
240 git-subtree-mainline:) main="$b" ;;
241 git-subtree-split:) sub="$b" ;;
242 END)
243 if [ -n "$sub" ]; then
244 if [ -n "$main" ]; then
245 # a rejoin commit?
246 # Pretend its sub was a squash.
247 sq="$sub"
248 fi
249 debug "Squash found: $sq $sub"
250 echo "$sq" "$sub"
251 break
252 fi
253 sq=
254 main=
255 sub=
256 ;;
257 esac
258 done
259}
260
261find_existing_splits()
262{
263 debug "Looking for prior splits..."
264 dir="$1"
265 revs="$2"
266 main=
267 sub=
268 git log --grep="^git-subtree-dir: $dir/*\$" \
269 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
270 while read a b junk; do
271 case "$a" in
272 START) sq="$b" ;;
273 git-subtree-mainline:) main="$b" ;;
274 git-subtree-split:) sub="$b" ;;
275 END)
276 debug " Main is: '$main'"
277 if [ -z "$main" -a -n "$sub" ]; then
278 # squash commits refer to a subtree
279 debug " Squash: $sq from $sub"
280 cache_set "$sq" "$sub"
281 fi
282 if [ -n "$main" -a -n "$sub" ]; then
283 debug " Prior: $main -> $sub"
284 cache_set $main $sub
285 cache_set $sub $sub
286 try_remove_previous "$main"
287 try_remove_previous "$sub"
288 fi
289 main=
290 sub=
291 ;;
292 esac
293 done
294}
295
296copy_commit()
297{
298 # We're going to set some environment vars here, so
299 # do it in a subshell to get rid of them safely later
300 debug copy_commit "{$1}" "{$2}" "{$3}"
301 git log -1 --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' "$1" |
302 (
303 read GIT_AUTHOR_NAME
304 read GIT_AUTHOR_EMAIL
305 read GIT_AUTHOR_DATE
306 read GIT_COMMITTER_NAME
307 read GIT_COMMITTER_EMAIL
308 read GIT_COMMITTER_DATE
309 export GIT_AUTHOR_NAME \
310 GIT_AUTHOR_EMAIL \
311 GIT_AUTHOR_DATE \
312 GIT_COMMITTER_NAME \
313 GIT_COMMITTER_EMAIL \
314 GIT_COMMITTER_DATE
315 (printf "%s" "$annotate"; cat ) |
316 git commit-tree "$2" $3 # reads the rest of stdin
317 ) || die "Can't copy commit $1"
318}
319
320add_msg()
321{
322 dir="$1"
323 latest_old="$2"
324 latest_new="$3"
325 if [ -n "$message" ]; then
326 commit_message="$message"
327 else
328 commit_message="Add '$dir/' from commit '$latest_new'"
329 fi
330 cat <<-EOF
331 $commit_message
332
333 git-subtree-dir: $dir
334 git-subtree-mainline: $latest_old
335 git-subtree-split: $latest_new
336 EOF
337}
338
339add_squashed_msg()
340{
341 if [ -n "$message" ]; then
342 echo "$message"
343 else
344 echo "Merge commit '$1' as '$2'"
345 fi
346}
347
348rejoin_msg()
349{
350 dir="$1"
351 latest_old="$2"
352 latest_new="$3"
353 if [ -n "$message" ]; then
354 commit_message="$message"
355 else
356 commit_message="Split '$dir/' into commit '$latest_new'"
357 fi
358 cat <<-EOF
359 $commit_message
360
361 git-subtree-dir: $dir
362 git-subtree-mainline: $latest_old
363 git-subtree-split: $latest_new
364 EOF
365}
366
367squash_msg()
368{
369 dir="$1"
370 oldsub="$2"
371 newsub="$3"
372 newsub_short=$(git rev-parse --short "$newsub")
373
374 if [ -n "$oldsub" ]; then
375 oldsub_short=$(git rev-parse --short "$oldsub")
376 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
377 echo
378 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
379 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
380 else
381 echo "Squashed '$dir/' content from commit $newsub_short"
382 fi
383
384 echo
385 echo "git-subtree-dir: $dir"
386 echo "git-subtree-split: $newsub"
387}
388
389toptree_for_commit()
390{
391 commit="$1"
392 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
393}
394
395subtree_for_commit()
396{
397 commit="$1"
398 dir="$2"
399 git ls-tree "$commit" -- "$dir" |
400 while read mode type tree name; do
401 assert [ "$name" = "$dir" ]
402 assert [ "$type" = "tree" -o "$type" = "commit" ]
403 [ "$type" = "commit" ] && continue # ignore submodules
404 echo $tree
405 break
406 done
407}
408
409tree_changed()
410{
411 tree=$1
412 shift
413 if [ $# -ne 1 ]; then
414 return 0 # weird parents, consider it changed
415 else
416 ptree=$(toptree_for_commit $1)
417 if [ "$ptree" != "$tree" ]; then
418 return 0 # changed
419 else
420 return 1 # not changed
421 fi
422 fi
423}
424
425new_squash_commit()
426{
427 old="$1"
428 oldsub="$2"
429 newsub="$3"
430 tree=$(toptree_for_commit $newsub) || exit $?
431 if [ -n "$old" ]; then
432 squash_msg "$dir" "$oldsub" "$newsub" |
433 git commit-tree "$tree" -p "$old" || exit $?
434 else
435 squash_msg "$dir" "" "$newsub" |
436 git commit-tree "$tree" || exit $?
437 fi
438}
439
440copy_or_skip()
441{
442 rev="$1"
443 tree="$2"
444 newparents="$3"
445 assert [ -n "$tree" ]
446
447 identical=
448 nonidentical=
449 p=
450 gotparents=
451 for parent in $newparents; do
452 ptree=$(toptree_for_commit $parent) || exit $?
453 [ -z "$ptree" ] && continue
454 if [ "$ptree" = "$tree" ]; then
455 # an identical parent could be used in place of this rev.
456 identical="$parent"
457 else
458 nonidentical="$parent"
459 fi
460
461 # sometimes both old parents map to the same newparent;
462 # eliminate duplicates
463 is_new=1
464 for gp in $gotparents; do
465 if [ "$gp" = "$parent" ]; then
466 is_new=
467 break
468 fi
469 done
470 if [ -n "$is_new" ]; then
471 gotparents="$gotparents $parent"
472 p="$p -p $parent"
473 fi
474 done
475
476 if [ -n "$identical" ]; then
477 echo $identical
478 else
479 copy_commit $rev $tree "$p" || exit $?
480 fi
481}
482
483ensure_clean()
484{
485 if ! git diff-index HEAD --exit-code --quiet 2>&1; then
486 die "Working tree has modifications. Cannot add."
487 fi
488 if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
489 die "Index has modifications. Cannot add."
490 fi
491}
492
493ensure_valid_ref_format()
494{
495 git check-ref-format "refs/heads/$1" ||
496 die "'$1' does not look like a ref"
497}
498
499cmd_add()
500{
501 if [ -e "$dir" ]; then
502 die "'$dir' already exists. Cannot add."
503 fi
504
505 ensure_clean
506
507 if [ $# -eq 1 ]; then
508 git rev-parse -q --verify "$1^{commit}" >/dev/null ||
509 die "'$1' does not refer to a commit"
510
511 "cmd_add_commit" "$@"
512 elif [ $# -eq 2 ]; then
513 # Technically we could accept a refspec here but we're
514 # just going to turn around and add FETCH_HEAD under the
515 # specified directory. Allowing a refspec might be
516 # misleading because we won't do anything with any other
517 # branches fetched via the refspec.
518 ensure_valid_ref_format "$2"
519
520 "cmd_add_repository" "$@"
521 else
522 say "error: parameters were '$@'"
523 die "Provide either a commit or a repository and commit."
524 fi
525}
526
527cmd_add_repository()
528{
529 echo "git fetch" "$@"
530 repository=$1
531 refspec=$2
532 git fetch "$@" || exit $?
533 revs=FETCH_HEAD
534 set -- $revs
535 cmd_add_commit "$@"
536}
537
538cmd_add_commit()
539{
540 revs=$(git rev-parse $default --revs-only "$@") || exit $?
541 set -- $revs
542 rev="$1"
543
544 debug "Adding $dir as '$rev'..."
545 git read-tree --prefix="$dir" $rev || exit $?
546 git checkout -- "$dir" || exit $?
547 tree=$(git write-tree) || exit $?
548
549 headrev=$(git rev-parse HEAD) || exit $?
550 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
551 headp="-p $headrev"
552 else
553 headp=
554 fi
555
556 if [ -n "$squash" ]; then
557 rev=$(new_squash_commit "" "" "$rev") || exit $?
558 commit=$(add_squashed_msg "$rev" "$dir" |
559 git commit-tree $tree $headp -p "$rev") || exit $?
560 else
561 revp=$(peel_committish "$rev") &&
562 commit=$(add_msg "$dir" "$headrev" "$rev" |
563 git commit-tree $tree $headp -p "$revp") || exit $?
564 fi
565 git reset "$commit" || exit $?
566
567 say "Added dir '$dir'"
568}
569
570cmd_split()
571{
572 debug "Splitting $dir..."
573 cache_setup || exit $?
574
575 if [ -n "$onto" ]; then
576 debug "Reading history for --onto=$onto..."
577 git rev-list $onto |
578 while read rev; do
579 # the 'onto' history is already just the subdir, so
580 # any parent we find there can be used verbatim
581 debug " cache: $rev"
582 cache_set $rev $rev
583 done
584 fi
585
586 if [ -n "$ignore_joins" ]; then
587 unrevs=
588 else
589 unrevs="$(find_existing_splits "$dir" "$revs")"
590 fi
591
592 # We can't restrict rev-list to only $dir here, because some of our
593 # parents have the $dir contents the root, and those won't match.
594 # (and rev-list --follow doesn't seem to solve this)
595 grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
596 revmax=$(eval "$grl" | wc -l)
597 revcount=0
598 createcount=0
599 eval "$grl" |
600 while read rev parents; do
601 revcount=$(($revcount + 1))
602 say -n "$revcount/$revmax ($createcount)
"
603 debug "Processing commit: $rev"
604 exists=$(cache_get $rev)
605 if [ -n "$exists" ]; then
606 debug " prior: $exists"
607 continue
608 fi
609 createcount=$(($createcount + 1))
610 debug " parents: $parents"
611 newparents=$(cache_get $parents)
612 debug " newparents: $newparents"
613
614 tree=$(subtree_for_commit $rev "$dir")
615 debug " tree is: $tree"
616
617 check_parents $parents
618
619 # ugly. is there no better way to tell if this is a subtree
620 # vs. a mainline commit? Does it matter?
621 if [ -z $tree ]; then
622 set_notree $rev
623 if [ -n "$newparents" ]; then
624 cache_set $rev $rev
625 fi
626 continue
627 fi
628
629 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
630 debug " newrev is: $newrev"
631 cache_set $rev $newrev
632 cache_set latest_new $newrev
633 cache_set latest_old $rev
634 done || exit $?
635 latest_new=$(cache_get latest_new)
636 if [ -z "$latest_new" ]; then
637 die "No new revisions were found"
638 fi
639
640 if [ -n "$rejoin" ]; then
641 debug "Merging split branch into HEAD..."
642 latest_old=$(cache_get latest_old)
643 git merge -s ours \
644 -m "$(rejoin_msg $dir $latest_old $latest_new)" \
645 $latest_new >&2 || exit $?
646 fi
647 if [ -n "$branch" ]; then
648 if rev_exists "refs/heads/$branch"; then
649 if ! rev_is_descendant_of_branch $latest_new $branch; then
650 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
651 fi
652 action='Updated'
653 else
654 action='Created'
655 fi
656 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
657 say "$action branch '$branch'"
658 fi
659 echo $latest_new
660 exit 0
661}
662
663cmd_merge()
664{
665 revs=$(git rev-parse $default --revs-only "$@") || exit $?
666 ensure_clean
667
668 set -- $revs
669 if [ $# -ne 1 ]; then
670 die "You must provide exactly one revision. Got: '$revs'"
671 fi
672 rev="$1"
673
674 if [ -n "$squash" ]; then
675 first_split="$(find_latest_squash "$dir")"
676 if [ -z "$first_split" ]; then
677 die "Can't squash-merge: '$dir' was never added."
678 fi
679 set $first_split
680 old=$1
681 sub=$2
682 if [ "$sub" = "$rev" ]; then
683 say "Subtree is already at commit $rev."
684 exit 0
685 fi
686 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
687 debug "New squash commit: $new"
688 rev="$new"
689 fi
690
691 version=$(git version)
692 if [ "$version" \< "git version 1.7" ]; then
693 if [ -n "$message" ]; then
694 git merge -s subtree --message="$message" $rev
695 else
696 git merge -s subtree $rev
697 fi
698 else
699 if [ -n "$message" ]; then
700 git merge -Xsubtree="$prefix" --message="$message" $rev
701 else
702 git merge -Xsubtree="$prefix" $rev
703 fi
704 fi
705}
706
707cmd_pull()
708{
709 if [ $# -ne 2 ]; then
710 die "You must provide <repository> <ref>"
711 fi
712 ensure_clean
713 ensure_valid_ref_format "$2"
714 git fetch "$@" || exit $?
715 revs=FETCH_HEAD
716 set -- $revs
717 cmd_merge "$@"
718}
719
720cmd_push()
721{
722 if [ $# -ne 2 ]; then
723 die "You must provide <repository> <ref>"
724 fi
725 ensure_valid_ref_format "$2"
726 if [ -e "$dir" ]; then
727 repository=$1
728 refspec=$2
729 echo "git push using: " $repository $refspec
730 localrev=$(git subtree split --prefix="$prefix") || die
731 git push $repository $localrev:refs/heads/$refspec
732 else
733 die "'$dir' must already exist. Try 'git subtree add'."
734 fi
735}
736
737"cmd_$command" "$@"