Merge branch 'tr/filter-branch'
authorJunio C Hamano <gitster@pobox.com>
Wed, 3 Sep 2008 00:47:13 +0000 (17:47 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 3 Sep 2008 00:47:13 +0000 (17:47 -0700)
* tr/filter-branch:
revision --simplify-merges: make it a no-op without pathspec
revision --simplify-merges: do not leave commits unprocessed
revision --simplify-merges: use decoration instead of commit->util field
Documentation: rev-list-options: move --simplify-merges documentation
filter-branch: use --simplify-merges
filter-branch: fix ref rewriting with --subdirectory-filter
filter-branch: Extend test to show rewriting bug
Topo-sort before --simplify-merges
revision traversal: show full history with merge simplification
revision.c: whitespace fix

1  2 
Documentation/rev-list-options.txt
git-filter-branch.sh
revision.c
revision.h
t/t7003-filter-branch.sh
index 735cf07b20e17e29d96f701d97768ae610aea590,15752b92a59a87d5bbd197e9e00b94b2f19e0269..0ce916a1887b0846bfc5a6e2233242601e0dde79
@@@ -96,7 -96,6 +96,7 @@@ you would get an output like this
  This implies the '--topo-order' option by default, but the
  '--date-order' option may also be specified.
  
 +ifndef::git-rev-list[]
  Diff Formatting
  ~~~~~~~~~~~~~~~
  
@@@ -126,7 -125,6 +126,7 @@@ options may be given. See linkgit:git-d
  -t::
  
        Show the tree objects in the diff output. This implies '-r'.
 +endif::git-rev-list[]
  
  Commit Limiting
  ~~~~~~~~~~~~~~~
@@@ -409,6 -407,48 +409,48 @@@ Note that without '\--full-history', th
  one of the parents is TREESAME, we follow only that one, so the other
  sides of the merge are never walked.
  
+ Finally, there is a fourth simplification mode available:
+ --simplify-merges::
+       First, build a history graph in the same way that
+       '\--full-history' with parent rewriting does (see above).
+ +
+ Then simplify each commit `C` to its replacement `C'` in the final
+ history according to the following rules:
+ +
+ --
+ * Set `C'` to `C`.
+ +
+ * Replace each parent `P` of `C'` with its simplification `P'`.  In
+   the process, drop parents that are ancestors of other parents, and
+   remove duplicates.
+ +
+ * If after this parent rewriting, `C'` is a root or merge commit (has
+   zero or >1 parents), a boundary commit, or !TREESAME, it remains.
+   Otherwise, it is replaced with its only parent.
+ --
+ +
+ The effect of this is best shown by way of comparing to
+ '\--full-history' with parent rewriting.  The example turns into:
+ +
+ -----------------------------------------------------------------------
+         .-A---M---N---O
+        /     /       /
+       I     B       D
+        \   /       /
+         `---------'
+ -----------------------------------------------------------------------
+ +
+ Note the major differences in `N` and `P` over '\--full-history':
+ +
+ --
+ * `N`'s parent list had `I` removed, because it is an ancestor of the
+   other parent `M`.  Still, `N` remained because it is !TREESAME.
+ +
+ * `P`'s parent list similarly had `I` removed.  `P` was then
+   removed completely, because it had one parent and is TREESAME.
+ --
  
  ifdef::git-rev-list[]
  Bisection Helpers
diff --combined git-filter-branch.sh
index 2871a59e3280584a2620a67f53e2e28dc808dac0,2688254af399ddc452b120734d41b41292b0ed69..81392add0b852f51f63a470727c33e0c306260d8
@@@ -232,11 -232,11 +232,11 @@@ mkdir ../map || die "Could not create m
  case "$filter_subdir" in
  "")
        git rev-list --reverse --topo-order --default HEAD \
-               --parents "$@"
+               --parents --simplify-merges "$@"
        ;;
  *)
        git rev-list --reverse --topo-order --default HEAD \
-               --parents "$@" -- "$filter_subdir"
+               --parents --simplify-merges "$@" -- "$filter_subdir"
  esac > ../revs || die "Could not get the commits"
  commits=$(wc -l <../revs | tr -d " ")
  
@@@ -317,24 -317,20 +317,20 @@@ done <../rev
  
  # In case of a subdirectory filter, it is possible that a specified head
  # is not in the set of rewritten commits, because it was pruned by the
- # revision walker.  Fix it by mapping these heads to the next rewritten
- # ancestor(s), i.e. the boundaries in the set of rewritten commits.
+ # revision walker.  Fix it by mapping these heads to the unique nearest
+ # ancestor that survived the pruning.
  
- # NEEDSWORK: we should sort the unmapped refs topologically first
- while read ref
- do
-       sha1=$(git rev-parse "$ref"^0)
-       test -f "$workdir"/../map/$sha1 && continue
-       # Assign the boundarie(s) in the set of rewritten commits
-       # as the replacement commit(s).
-       # (This would look a bit nicer if --not --stdin worked.)
-       for p in $( (cd "$workdir"/../map; ls | sed "s/^/^/") |
-               git rev-list $ref --boundary --stdin |
-               sed -n "s/^-//p")
+ if test "$filter_subdir"
+ then
+       while read ref
        do
-               map $p >> "$workdir"/../map/$sha1
-       done
- done < "$tempdir"/heads
+               sha1=$(git rev-parse "$ref"^0)
+               test -f "$workdir"/../map/$sha1 && continue
+               ancestor=$(git rev-list --simplify-merges -1 \
+                               $ref -- "$filter_subdir")
+               test "$ancestor" && echo $(map $ancestor) >> "$workdir"/../map/$sha1
+       done < "$tempdir"/heads
+ fi
  
  # Finally update the refs
  
@@@ -416,17 -412,15 +412,17 @@@ if [ "$filter_tag_name" ]; the
                echo "$ref -> $new_ref ($sha1 -> $new_sha1)"
  
                if [ "$type" = "tag" ]; then
 -                      new_sha1=$(git cat-file tag "$ref" |
 +                      new_sha1=$( ( printf 'object %s\ntype commit\ntag %s\n' \
 +                                              "$new_sha1" "$new_ref"
 +                              git cat-file tag "$ref" |
                                sed -n \
                                    -e "1,/^$/{
 -                                        s/^object .*/object $new_sha1/
 -                                        s/^type .*/type commit/
 -                                        s/^tag .*/tag $new_ref/
 +                                        /^object /d
 +                                        /^type /d
 +                                        /^tag /d
                                        }" \
                                    -e '/^-----BEGIN PGP SIGNATURE-----/q' \
 -                                  -e 'p' |
 +                                  -e 'p' |
                                git mktag) ||
                                die "Could not create new tag object for $ref"
                        if git cat-file tag "$ref" | \
diff --combined revision.c
index 36291b6b864a1213841aba91d58693324d1c88c7,db2ab2b11a39c295373488f6ff28c1340a9a666d..bcbc7bd7e4257088a8504c12b60ef33d2fac9514
@@@ -489,7 -489,7 +489,7 @@@ static int add_parents_to_list(struct r
                        p->object.flags |= SEEN;
                        insert_by_date_cached(p, list, cached_base, cache_ptr);
                }
-               if(revs->first_parent_only)
+               if (revs->first_parent_only)
                        break;
        }
        return 0;
@@@ -782,10 -782,6 +782,10 @@@ void init_revisions(struct rev_info *re
  
        revs->commit_format = CMIT_FMT_DEFAULT;
  
 +      revs->grep_filter.status_only = 1;
 +      revs->grep_filter.pattern_tail = &(revs->grep_filter.pattern_list);
 +      revs->grep_filter.regflags = REG_NEWLINE;
 +
        diff_setup(&revs->diffopt);
        if (prefix && !revs->diffopt.prefix) {
                revs->diffopt.prefix = prefix;
@@@ -950,7 -946,15 +950,7 @@@ void read_revisions_from_stdin(struct r
  
  static void add_grep(struct rev_info *revs, const char *ptn, enum grep_pat_token what)
  {
 -      if (!revs->grep_filter) {
 -              struct grep_opt *opt = xcalloc(1, sizeof(*opt));
 -              opt->status_only = 1;
 -              opt->pattern_tail = &(opt->pattern_list);
 -              opt->regflags = REG_NEWLINE;
 -              revs->grep_filter = opt;
 -      }
 -      append_grep_pattern(revs->grep_filter, ptn,
 -                          "command line", 0, what);
 +      append_grep_pattern(&revs->grep_filter, ptn, "command line", 0, what);
  }
  
  static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
@@@ -1041,6 -1045,11 +1041,11 @@@ static int handle_revision_opt(struct r
        } else if (!strcmp(arg, "--topo-order")) {
                revs->lifo = 1;
                revs->topo_order = 1;
+       } else if (!strcmp(arg, "--simplify-merges")) {
+               revs->simplify_merges = 1;
+               revs->rewrite_parents = 1;
+               revs->simplify_history = 0;
+               revs->limited = 1;
        } else if (!strcmp(arg, "--date-order")) {
                revs->lifo = 0;
                revs->topo_order = 1;
        } else if (!prefixcmp(arg, "--grep=")) {
                add_message_grep(revs, arg+7);
        } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
 -              if (revs->grep_filter)
 -                      revs->grep_filter->regflags |= REG_EXTENDED;
 +              revs->grep_filter.regflags |= REG_EXTENDED;
        } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
 -              if (revs->grep_filter)
 -                      revs->grep_filter->regflags |= REG_ICASE;
 +              revs->grep_filter.regflags |= REG_ICASE;
        } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
 -              if (revs->grep_filter)
 -                      revs->grep_filter->fixed = 1;
 +              revs->grep_filter.fixed = 1;
        } else if (!strcmp(arg, "--all-match")) {
 -              if (revs->grep_filter)
 -                      revs->grep_filter->all_match = 1;
 +              revs->grep_filter.all_match = 1;
        } else if (!prefixcmp(arg, "--encoding=")) {
                arg += 11;
                if (strcmp(arg, "none"))
@@@ -1341,7 -1354,9 +1346,7 @@@ int setup_revisions(int argc, const cha
        if (diff_setup_done(&revs->diffopt) < 0)
                die("diff_setup_done failed");
  
 -      if (revs->grep_filter) {
 -              compile_grep_patterns(revs->grep_filter);
 -      }
 +      compile_grep_patterns(&revs->grep_filter);
  
        if (revs->reverse && revs->reflog_info)
                die("cannot combine --reverse with --walk-reflogs");
@@@ -1368,6 -1383,179 +1373,179 @@@ static void add_child(struct rev_info *
        l->next = add_decoration(&revs->children, &parent->object, l);
  }
  
+ static int remove_duplicate_parents(struct commit *commit)
+ {
+       struct commit_list **pp, *p;
+       int surviving_parents;
+       /* Examine existing parents while marking ones we have seen... */
+       pp = &commit->parents;
+       while ((p = *pp) != NULL) {
+               struct commit *parent = p->item;
+               if (parent->object.flags & TMP_MARK) {
+                       *pp = p->next;
+                       continue;
+               }
+               parent->object.flags |= TMP_MARK;
+               pp = &p->next;
+       }
+       /* count them while clearing the temporary mark */
+       surviving_parents = 0;
+       for (p = commit->parents; p; p = p->next) {
+               p->item->object.flags &= ~TMP_MARK;
+               surviving_parents++;
+       }
+       return surviving_parents;
+ }
+ struct merge_simplify_state {
+       struct commit *simplified;
+ };
+ static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit)
+ {
+       struct merge_simplify_state *st;
+       st = lookup_decoration(&revs->merge_simplification, &commit->object);
+       if (!st) {
+               st = xcalloc(1, sizeof(*st));
+               add_decoration(&revs->merge_simplification, &commit->object, st);
+       }
+       return st;
+ }
+ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
+ {
+       struct commit_list *p;
+       struct merge_simplify_state *st, *pst;
+       int cnt;
+       st = locate_simplify_state(revs, commit);
+       /*
+        * Have we handled this one?
+        */
+       if (st->simplified)
+               return tail;
+       /*
+        * An UNINTERESTING commit simplifies to itself, so does a
+        * root commit.  We do not rewrite parents of such commit
+        * anyway.
+        */
+       if ((commit->object.flags & UNINTERESTING) || !commit->parents) {
+               st->simplified = commit;
+               return tail;
+       }
+       /*
+        * Do we know what commit all of our parents should be rewritten to?
+        * Otherwise we are not ready to rewrite this one yet.
+        */
+       for (cnt = 0, p = commit->parents; p; p = p->next) {
+               pst = locate_simplify_state(revs, p->item);
+               if (!pst->simplified) {
+                       tail = &commit_list_insert(p->item, tail)->next;
+                       cnt++;
+               }
+       }
+       if (cnt) {
+               tail = &commit_list_insert(commit, tail)->next;
+               return tail;
+       }
+       /*
+        * Rewrite our list of parents.
+        */
+       for (p = commit->parents; p; p = p->next) {
+               pst = locate_simplify_state(revs, p->item);
+               p->item = pst->simplified;
+       }
+       cnt = remove_duplicate_parents(commit);
+       /*
+        * It is possible that we are a merge and one side branch
+        * does not have any commit that touches the given paths;
+        * in such a case, the immediate parents will be rewritten
+        * to different commits.
+        *
+        *      o----X          X: the commit we are looking at;
+        *     /    /           o: a commit that touches the paths;
+        * ---o----'
+        *
+        * Further reduce the parents by removing redundant parents.
+        */
+       if (1 < cnt) {
+               struct commit_list *h = reduce_heads(commit->parents);
+               cnt = commit_list_count(h);
+               free_commit_list(commit->parents);
+               commit->parents = h;
+       }
+       /*
+        * A commit simplifies to itself if it is a root, if it is
+        * UNINTERESTING, if it touches the given paths, or if it is a
+        * merge and its parents simplifies to more than one commits
+        * (the first two cases are already handled at the beginning of
+        * this function).
+        *
+        * Otherwise, it simplifies to what its sole parent simplifies to.
+        */
+       if (!cnt ||
+           (commit->object.flags & UNINTERESTING) ||
+           !(commit->object.flags & TREESAME) ||
+           (1 < cnt))
+               st->simplified = commit;
+       else {
+               pst = locate_simplify_state(revs, commit->parents->item);
+               st->simplified = pst->simplified;
+       }
+       return tail;
+ }
+ static void simplify_merges(struct rev_info *revs)
+ {
+       struct commit_list *list;
+       struct commit_list *yet_to_do, **tail;
+       if (!revs->topo_order)
+               sort_in_topological_order(&revs->commits, revs->lifo);
+       if (!revs->prune)
+               return;
+       /* feed the list reversed */
+       yet_to_do = NULL;
+       for (list = revs->commits; list; list = list->next)
+               commit_list_insert(list->item, &yet_to_do);
+       while (yet_to_do) {
+               list = yet_to_do;
+               yet_to_do = NULL;
+               tail = &yet_to_do;
+               while (list) {
+                       struct commit *commit = list->item;
+                       struct commit_list *next = list->next;
+                       free(list);
+                       list = next;
+                       tail = simplify_one(revs, commit, tail);
+               }
+       }
+       /* clean up the result, removing the simplified ones */
+       list = revs->commits;
+       revs->commits = NULL;
+       tail = &revs->commits;
+       while (list) {
+               struct commit *commit = list->item;
+               struct commit_list *next = list->next;
+               struct merge_simplify_state *st;
+               free(list);
+               list = next;
+               st = locate_simplify_state(revs, commit);
+               if (st->simplified == commit)
+                       tail = &commit_list_insert(commit, tail)->next;
+       }
+ }
  static void set_children(struct rev_info *revs)
  {
        struct commit_list *l;
@@@ -1408,6 -1596,8 +1586,8 @@@ int prepare_revision_walk(struct rev_in
                        return -1;
        if (revs->topo_order)
                sort_in_topological_order(&revs->commits, revs->lifo);
+       if (revs->simplify_merges)
+               simplify_merges(revs);
        if (revs->children.name)
                set_children(revs);
        return 0;
@@@ -1440,26 -1630,6 +1620,6 @@@ static enum rewrite_result rewrite_one(
        }
  }
  
- static void remove_duplicate_parents(struct commit *commit)
- {
-       struct commit_list **pp, *p;
-       /* Examine existing parents while marking ones we have seen... */
-       pp = &commit->parents;
-       while ((p = *pp) != NULL) {
-               struct commit *parent = p->item;
-               if (parent->object.flags & TMP_MARK) {
-                       *pp = p->next;
-                       continue;
-               }
-               parent->object.flags |= TMP_MARK;
-               pp = &p->next;
-       }
-       /* ... and clear the temporary mark */
-       for (p = commit->parents; p; p = p->next)
-               p->item->object.flags &= ~TMP_MARK;
- }
  static int rewrite_parents(struct rev_info *revs, struct commit *commit)
  {
        struct commit_list **pp = &commit->parents;
  
  static int commit_match(struct commit *commit, struct rev_info *opt)
  {
 -      if (!opt->grep_filter)
 +      if (!opt->grep_filter.pattern_list)
                return 1;
 -      return grep_buffer(opt->grep_filter,
 +      return grep_buffer(&opt->grep_filter,
                           NULL, /* we say nothing, not even filename */
                           commit->buffer, strlen(commit->buffer));
  }
diff --combined revision.h
index 91f194478bb91d381ab2b2440215144d8bb8d18d,765ef6c5e2f3bf0fbf84479cf83e0c0e4c82dda4..fc23522b3850edb3752181596fd83da8599e3d27
@@@ -2,7 -2,6 +2,7 @@@
  #define REVISION_H
  
  #include "parse-options.h"
 +#include "grep.h"
  
  #define SEEN          (1u<<0)
  #define UNINTERESTING   (1u<<1)
@@@ -42,6 -41,7 +42,7 @@@ struct rev_info 
                        simplify_history:1,
                        lifo:1,
                        topo_order:1,
+                       simplify_merges:1,
                        tag_objects:1,
                        tree_objects:1,
                        blob_objects:1,
@@@ -93,7 -93,7 +94,7 @@@
        int             show_log_size;
  
        /* Filter by commit log message */
 -      struct grep_opt *grep_filter;
 +      struct grep_opt grep_filter;
  
        /* Display history graph */
        struct git_graph *graph;
  
        struct reflog_walk_info *reflog_info;
        struct decoration children;
+       struct decoration merge_simplification;
  };
  
  #define REV_TREE_SAME         0
  void read_revisions_from_stdin(struct rev_info *revs);
  
  typedef void (*show_early_output_fn_t)(struct rev_info *, struct commit_list *);
 -volatile show_early_output_fn_t show_early_output;
 +extern volatile show_early_output_fn_t show_early_output;
  
  extern void init_revisions(struct rev_info *revs, const char *prefix);
  extern int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def);
diff --combined t/t7003-filter-branch.sh
index f92d414e63550cf4e0dcf3b3bc9776f1e23c8a6c,233254f2b541b7c67d99aa738eb55f0a0a6d3131..95f13a8b2ba41b95f04d667ad18b4a07c8ee9d15
@@@ -96,13 -96,17 +96,17 @@@ test_expect_success 'filter subdirector
        test_tick &&
        git commit -m "again not subdir" &&
        git branch sub &&
-       git-filter-branch -f --subdirectory-filter subdir refs/heads/sub
+       git branch sub-earlier HEAD~2 &&
+       git-filter-branch -f --subdirectory-filter subdir \
+               refs/heads/sub refs/heads/sub-earlier
  '
  
  test_expect_success 'subdirectory filter result looks okay' '
        test 2 = $(git rev-list sub | wc -l) &&
        git show sub:new &&
-       test_must_fail git show sub:subdir
+       test_must_fail git show sub:subdir &&
+       git show sub-earlier:new &&
+       test_must_fail git show sub-earlier:subdir
  '
  
  test_expect_success 'more setup' '
@@@ -250,12 -254,4 +254,12 @@@ test_expect_success 'Tag name filterin
        test_cmp expect actual
  '
  
 +test_expect_success 'Tag name filtering allows slashes in tag names' '
 +      git tag -m tag-with-slash X/1 &&
 +      git cat-file tag X/1 | sed -e s,X/1,X/2, > expect &&
 +      git filter-branch -f --tag-name-filter "echo X/2" &&
 +      git cat-file tag X/2 > actual &&
 +      test_cmp expect actual
 +'
 +
  test_done