1#!/bin/bash
2#
3# git-subtree.sh: split/join git repositories in subdirectories of this one
4#
5# Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6#
7if [ $# -eq 0 ]; then
8 set -- -h
9fi
10OPTS_SPEC="\
11git subtree add --prefix=<prefix> <commit>
12git subtree merge --prefix=<prefix> <commit>
13git subtree pull --prefix=<prefix> <repository> <refspec...>
14git subtree split --prefix=<prefix> <commit...>
15--
16h,help show the help
17q quiet
18d show debug messages
19prefix= the name of the subdir to split out
20 options for 'split'
21annotate= add a prefix to commit message of new commits
22b,branch= create a new branch from the split subtree
23ignore-joins ignore prior --rejoin commits
24onto= try connecting new tree to an existing one
25rejoin merge the new branch back into HEAD
26 options for 'add', 'merge', and 'pull'
27squash merge subtree changes as a single commit
28"
29eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)
30PATH=$(git --exec-path):$PATH
31. git-sh-setup
32require_work_tree
33
34quiet=
35branch=
36debug=
37command=
38onto=
39rejoin=
40ignore_joins=
41annotate=
42squash=
43
44debug()
45{
46 if [ -n "$debug" ]; then
47 echo "$@" >&2
48 fi
49}
50
51say()
52{
53 if [ -z "$quiet" ]; then
54 echo "$@" >&2
55 fi
56}
57
58assert()
59{
60 if "$@"; then
61 :
62 else
63 die "assertion failed: " "$@"
64 fi
65}
66
67
68#echo "Options: $*"
69
70while [ $# -gt 0 ]; do
71 opt="$1"
72 shift
73 case "$opt" in
74 -q) quiet=1 ;;
75 -d) debug=1 ;;
76 --annotate) annotate="$1"; shift ;;
77 --no-annotate) annotate= ;;
78 -b) branch="$1"; shift ;;
79 --prefix) prefix="$1"; shift ;;
80 --no-prefix) prefix= ;;
81 --onto) onto="$1"; shift ;;
82 --no-onto) onto= ;;
83 --rejoin) rejoin=1 ;;
84 --no-rejoin) rejoin= ;;
85 --ignore-joins) ignore_joins=1 ;;
86 --no-ignore-joins) ignore_joins= ;;
87 --squash) squash=1 ;;
88 --no-squash) squash= ;;
89 --) break ;;
90 *) die "Unexpected option: $opt" ;;
91 esac
92done
93
94command="$1"
95shift
96case "$command" in
97 add|merge|pull) default= ;;
98 split) default="--default HEAD" ;;
99 *) die "Unknown command '$command'" ;;
100esac
101
102if [ -z "$prefix" ]; then
103 die "You must provide the --prefix option."
104fi
105dir="$prefix"
106
107if [ "$command" != "pull" ]; then
108 revs=$(git rev-parse $default --revs-only "$@") || exit $?
109 dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
110 if [ -n "$dirs" ]; then
111 die "Error: Use --prefix instead of bare filenames."
112 fi
113fi
114
115debug "command: {$command}"
116debug "quiet: {$quiet}"
117debug "revs: {$revs}"
118debug "dir: {$dir}"
119debug "opts: {$*}"
120debug
121
122cache_setup()
123{
124 cachedir="$GIT_DIR/subtree-cache/$$"
125 rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
126 mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
127 debug "Using cachedir: $cachedir" >&2
128}
129
130cache_get()
131{
132 for oldrev in $*; do
133 if [ -r "$cachedir/$oldrev" ]; then
134 read newrev <"$cachedir/$oldrev"
135 echo $newrev
136 fi
137 done
138}
139
140cache_set()
141{
142 oldrev="$1"
143 newrev="$2"
144 if [ "$oldrev" != "latest_old" \
145 -a "$oldrev" != "latest_new" \
146 -a -e "$cachedir/$oldrev" ]; then
147 die "cache for $oldrev already exists!"
148 fi
149 echo "$newrev" >"$cachedir/$oldrev"
150}
151
152rev_exists()
153{
154 if git rev-parse "$1" >/dev/null 2>&1; then
155 return 0
156 else
157 return 1
158 fi
159}
160
161# if a commit doesn't have a parent, this might not work. But we only want
162# to remove the parent from the rev-list, and since it doesn't exist, it won't
163# be there anyway, so do nothing in that case.
164try_remove_previous()
165{
166 if rev_exists "$1^"; then
167 echo "^$1^"
168 fi
169}
170
171find_latest_squash()
172{
173 debug "Looking for latest squash ($dir)..."
174 dir="$1"
175 sq=
176 main=
177 sub=
178 git log --grep="^git-subtree-dir: $dir\$" \
179 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
180 while read a b junk; do
181 debug "$a $b $junk"
182 debug "{{$sq/$main/$sub}}"
183 case "$a" in
184 START) sq="$b" ;;
185 git-subtree-mainline:) main="$b" ;;
186 git-subtree-split:) sub="$b" ;;
187 END)
188 if [ -n "$sub" ]; then
189 if [ -n "$main" ]; then
190 # a rejoin commit?
191 # Pretend its sub was a squash.
192 sq="$sub"
193 fi
194 debug "Squash found: $sq $sub"
195 echo "$sq" "$sub"
196 break
197 fi
198 sq=
199 main=
200 sub=
201 ;;
202 esac
203 done
204}
205
206find_existing_splits()
207{
208 debug "Looking for prior splits..."
209 dir="$1"
210 revs="$2"
211 main=
212 sub=
213 git log --grep="^git-subtree-dir: $dir\$" \
214 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
215 while read a b junk; do
216 case "$a" in
217 START) main="$b"; sq="$b" ;;
218 git-subtree-mainline:) main="$b" ;;
219 git-subtree-split:) sub="$b" ;;
220 END)
221 if [ -z "$main" -a -n "$sub" ]; then
222 # squash commits refer to a subtree
223 cache_set "$sq" "$sub"
224 fi
225 if [ -n "$main" -a -n "$sub" ]; then
226 debug " Prior: $main -> $sub"
227 cache_set $main $sub
228 try_remove_previous "$main"
229 try_remove_previous "$sub"
230 fi
231 main=
232 sub=
233 ;;
234 esac
235 done
236}
237
238copy_commit()
239{
240 # We're going to set some environment vars here, so
241 # do it in a subshell to get rid of them safely later
242 debug copy_commit "{$1}" "{$2}" "{$3}"
243 git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%s%n%n%b' "$1" |
244 (
245 read GIT_AUTHOR_NAME
246 read GIT_AUTHOR_EMAIL
247 read GIT_AUTHOR_DATE
248 read GIT_COMMITTER_NAME
249 read GIT_COMMITTER_EMAIL
250 read GIT_COMMITTER_DATE
251 export GIT_AUTHOR_NAME \
252 GIT_AUTHOR_EMAIL \
253 GIT_AUTHOR_DATE \
254 GIT_COMMITTER_NAME \
255 GIT_COMMITTER_EMAIL \
256 GIT_COMMITTER_DATE
257 (echo -n "$annotate"; cat ) |
258 git commit-tree "$2" $3 # reads the rest of stdin
259 ) || die "Can't copy commit $1"
260}
261
262add_msg()
263{
264 dir="$1"
265 latest_old="$2"
266 latest_new="$3"
267 cat <<-EOF
268 Add '$dir/' from commit '$latest_new'
269
270 git-subtree-dir: $dir
271 git-subtree-mainline: $latest_old
272 git-subtree-split: $latest_new
273 EOF
274}
275
276rejoin_msg()
277{
278 dir="$1"
279 latest_old="$2"
280 latest_new="$3"
281 cat <<-EOF
282 Split '$dir/' into commit '$latest_new'
283
284 git-subtree-dir: $dir
285 git-subtree-mainline: $latest_old
286 git-subtree-split: $latest_new
287 EOF
288}
289
290squash_msg()
291{
292 dir="$1"
293 oldsub="$2"
294 newsub="$3"
295 newsub_short=$(git rev-parse --short "$newsub")
296
297 if [ -n "$oldsub" ]; then
298 oldsub_short=$(git rev-parse --short "$oldsub")
299 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
300 echo
301 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
302 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
303 else
304 echo "Squashed '$dir/' content from commit $newsub_short"
305 fi
306
307 echo
308 echo "git-subtree-dir: $dir"
309 echo "git-subtree-split: $newsub"
310}
311
312toptree_for_commit()
313{
314 commit="$1"
315 git log -1 --pretty=format:'%T' "$commit" -- || exit $?
316}
317
318subtree_for_commit()
319{
320 commit="$1"
321 dir="$2"
322 git ls-tree "$commit" -- "$dir" |
323 while read mode type tree name; do
324 assert [ "$name" = "$dir" ]
325 echo $tree
326 break
327 done
328}
329
330tree_changed()
331{
332 tree=$1
333 shift
334 if [ $# -ne 1 ]; then
335 return 0 # weird parents, consider it changed
336 else
337 ptree=$(toptree_for_commit $1)
338 if [ "$ptree" != "$tree" ]; then
339 return 0 # changed
340 else
341 return 1 # not changed
342 fi
343 fi
344}
345
346new_squash_commit()
347{
348 old="$1"
349 oldsub="$2"
350 newsub="$3"
351 tree=$(toptree_for_commit $newsub) || exit $?
352 if [ -n "$old" ]; then
353 squash_msg "$dir" "$oldsub" "$newsub" |
354 git commit-tree "$tree" -p "$old" || exit $?
355 else
356 squash_msg "$dir" "" "$newsub" |
357 git commit-tree "$tree" || exit $?
358 fi
359}
360
361copy_or_skip()
362{
363 rev="$1"
364 tree="$2"
365 newparents="$3"
366 assert [ -n "$tree" ]
367
368 identical=
369 nonidentical=
370 p=
371 gotparents=
372 for parent in $newparents; do
373 ptree=$(toptree_for_commit $parent) || exit $?
374 [ -z "$ptree" ] && continue
375 if [ "$ptree" = "$tree" ]; then
376 # an identical parent could be used in place of this rev.
377 identical="$parent"
378 else
379 nonidentical="$parent"
380 fi
381
382 # sometimes both old parents map to the same newparent;
383 # eliminate duplicates
384 is_new=1
385 for gp in $gotparents; do
386 if [ "$gp" = "$parent" ]; then
387 is_new=
388 break
389 fi
390 done
391 if [ -n "$is_new" ]; then
392 gotparents="$gotparents $parent"
393 p="$p -p $parent"
394 fi
395 done
396
397 if [ -n "$identical" ]; then
398 echo $identical
399 else
400 copy_commit $rev $tree "$p" || exit $?
401 fi
402}
403
404ensure_clean()
405{
406 if ! git diff-index HEAD --exit-code --quiet; then
407 die "Working tree has modifications. Cannot add."
408 fi
409 if ! git diff-index --cached HEAD --exit-code --quiet; then
410 die "Index has modifications. Cannot add."
411 fi
412}
413
414cmd_add()
415{
416 if [ -e "$dir" ]; then
417 die "'$dir' already exists. Cannot add."
418 fi
419 ensure_clean
420
421 set -- $revs
422 if [ $# -ne 1 ]; then
423 die "You must provide exactly one revision. Got: '$revs'"
424 fi
425 rev="$1"
426
427 debug "Adding $dir as '$rev'..."
428 git read-tree --prefix="$dir" $rev || exit $?
429 git checkout "$dir" || exit $?
430 tree=$(git write-tree) || exit $?
431
432 headrev=$(git rev-parse HEAD) || exit $?
433 if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
434 headp="-p $headrev"
435 else
436 headp=
437 fi
438
439 if [ -n "$squash" ]; then
440 rev=$(new_squash_commit "" "" "$rev") || exit $?
441 commit=$(echo "Merge commit '$rev' as '$dir'" |
442 git commit-tree $tree $headp -p "$rev") || exit $?
443 else
444 commit=$(add_msg "$dir" "$headrev" "$rev" |
445 git commit-tree $tree $headp -p "$rev") || exit $?
446 fi
447 git reset "$commit" || exit $?
448
449 say "Added dir '$dir'"
450}
451
452cmd_split()
453{
454 if [ -n "$branch" ] && rev_exists "refs/heads/$branch"; then
455 die "Branch '$branch' already exists."
456 fi
457
458 debug "Splitting $dir..."
459 cache_setup || exit $?
460
461 if [ -n "$onto" ]; then
462 debug "Reading history for --onto=$onto..."
463 git rev-list $onto |
464 while read rev; do
465 # the 'onto' history is already just the subdir, so
466 # any parent we find there can be used verbatim
467 debug " cache: $rev"
468 cache_set $rev $rev
469 done
470 fi
471
472 if [ -n "$ignore_joins" ]; then
473 unrevs=
474 else
475 unrevs="$(find_existing_splits "$dir" "$revs")"
476 fi
477
478 # We can't restrict rev-list to only $dir here, because some of our
479 # parents have the $dir contents the root, and those won't match.
480 # (and rev-list --follow doesn't seem to solve this)
481 grl='git rev-list --reverse --parents $revs $unrevs'
482 revmax=$(eval "$grl" | wc -l)
483 revcount=0
484 createcount=0
485 eval "$grl" |
486 while read rev parents; do
487 revcount=$(($revcount + 1))
488 say -n "$revcount/$revmax ($createcount)
"
489 debug "Processing commit: $rev"
490 exists=$(cache_get $rev)
491 if [ -n "$exists" ]; then
492 debug " prior: $exists"
493 continue
494 fi
495 createcount=$(($createcount + 1))
496 debug " parents: $parents"
497 newparents=$(cache_get $parents)
498 debug " newparents: $newparents"
499
500 tree=$(subtree_for_commit $rev "$dir")
501 debug " tree is: $tree"
502
503 # ugly. is there no better way to tell if this is a subtree
504 # vs. a mainline commit? Does it matter?
505 [ -z $tree ] && continue
506
507 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
508 debug " newrev is: $newrev"
509 cache_set $rev $newrev
510 cache_set latest_new $newrev
511 cache_set latest_old $rev
512 done || exit $?
513 latest_new=$(cache_get latest_new)
514 if [ -z "$latest_new" ]; then
515 die "No new revisions were found"
516 fi
517
518 if [ -n "$rejoin" ]; then
519 debug "Merging split branch into HEAD..."
520 latest_old=$(cache_get latest_old)
521 git merge -s ours \
522 -m "$(rejoin_msg $dir $latest_old $latest_new)" \
523 $latest_new >&2 || exit $?
524 fi
525 if [ -n "$branch" ]; then
526 git update-ref -m 'subtree split' "refs/heads/$branch" \
527 $latest_new "" || exit $?
528 say "Created branch '$branch'"
529 fi
530 echo $latest_new
531 exit 0
532}
533
534cmd_merge()
535{
536 ensure_clean
537
538 set -- $revs
539 if [ $# -ne 1 ]; then
540 die "You must provide exactly one revision. Got: '$revs'"
541 fi
542 rev="$1"
543
544 if [ -n "$squash" ]; then
545 first_split="$(find_latest_squash "$dir")"
546 if [ -z "$first_split" ]; then
547 die "Can't squash-merge: '$dir' was never added."
548 fi
549 set $first_split
550 old=$1
551 sub=$2
552 if [ "$sub" = "$rev" ]; then
553 say "Subtree is already at commit $rev."
554 exit 0
555 fi
556 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
557 debug "New squash commit: $new"
558 rev="$new"
559 fi
560
561 git merge -s subtree $rev
562}
563
564cmd_pull()
565{
566 ensure_clean
567 set -x
568 git pull -s subtree "$@"
569}
570
571"cmd_$command" "$@"