t6012: update test for tweaked full-history traversal
[gitweb.git] / revision.c
index a50df66c50ed4c9b8118ba511f0d67fc7ed7e5ab..64b86ae91ece9ccdee0eee0009395a3ce85f752a 100644 (file)
@@ -13,6 +13,7 @@
 #include "decorate.h"
 #include "log-tree.h"
 #include "string-list.h"
+#include "mailmap.h"
 
 volatile show_early_output_fn_t show_early_output;
 
@@ -428,10 +429,100 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
        return retval >= 0 && (tree_difference == REV_TREE_SAME);
 }
 
+struct treesame_state {
+       unsigned int nparents;
+       unsigned char treesame[FLEX_ARRAY];
+};
+
+static struct treesame_state *initialise_treesame(struct rev_info *revs, struct commit *commit)
+{
+       unsigned n = commit_list_count(commit->parents);
+       struct treesame_state *st = xcalloc(1, sizeof(*st) + n);
+       st->nparents = n;
+       add_decoration(&revs->treesame, &commit->object, st);
+       return st;
+}
+
+/*
+ * Must be called immediately after removing the nth_parent from a commit's
+ * parent list, if we are maintaining the per-parent treesame[] decoration.
+ * This does not recalculate the master TREESAME flag - update_treesame()
+ * should be called to update it after a sequence of treesame[] modifications
+ * that may have affected it.
+ */
+static int compact_treesame(struct rev_info *revs, struct commit *commit, unsigned nth_parent)
+{
+       struct treesame_state *st;
+       int old_same;
+
+       if (!commit->parents) {
+               /*
+                * Have just removed the only parent from a non-merge.
+                * Different handling, as we lack decoration.
+                */
+               if (nth_parent != 0)
+                       die("compact_treesame %u", nth_parent);
+               old_same = !!(commit->object.flags & TREESAME);
+               if (rev_same_tree_as_empty(revs, commit))
+                       commit->object.flags |= TREESAME;
+               else
+                       commit->object.flags &= ~TREESAME;
+               return old_same;
+       }
+
+       st = lookup_decoration(&revs->treesame, &commit->object);
+       if (!st || nth_parent >= st->nparents)
+               die("compact_treesame %u", nth_parent);
+
+       old_same = st->treesame[nth_parent];
+       memmove(st->treesame + nth_parent,
+               st->treesame + nth_parent + 1,
+               st->nparents - nth_parent - 1);
+
+       /*
+        * If we've just become a non-merge commit, update TREESAME
+        * immediately, and remove the no-longer-needed decoration.
+        * If still a merge, defer update until update_treesame().
+        */
+       if (--st->nparents == 1) {
+               if (commit->parents->next)
+                       die("compact_treesame parents mismatch");
+               if (st->treesame[0] && revs->dense)
+                       commit->object.flags |= TREESAME;
+               else
+                       commit->object.flags &= ~TREESAME;
+               free(add_decoration(&revs->treesame, &commit->object, NULL));
+       }
+
+       return old_same;
+}
+
+static unsigned update_treesame(struct rev_info *revs, struct commit *commit)
+{
+       if (commit->parents && commit->parents->next) {
+               unsigned n;
+               struct treesame_state *st;
+
+               st = lookup_decoration(&revs->treesame, &commit->object);
+               if (!st)
+                       die("update_treesame %s", sha1_to_hex(commit->object.sha1));
+               commit->object.flags |= TREESAME;
+               for (n = 0; n < st->nparents; n++) {
+                       if (!st->treesame[n]) {
+                               commit->object.flags &= ~TREESAME;
+                               break;
+                       }
+               }
+       }
+
+       return commit->object.flags & TREESAME;
+}
+
 static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 {
        struct commit_list **pp, *parent;
-       int tree_changed = 0, tree_same = 0, nth_parent = 0;
+       struct treesame_state *ts = NULL;
+       int tree_changed = 0, nth_parent;
 
        /*
         * If we don't do pruning, everything is interesting
@@ -455,25 +546,43 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
        if (!revs->dense && !commit->parents->next)
                return;
 
-       pp = &commit->parents;
-       while ((parent = *pp) != NULL) {
+       for (pp = &commit->parents, nth_parent = 0;
+            (parent = *pp) != NULL;
+            pp = &parent->next, nth_parent++) {
                struct commit *p = parent->item;
 
-               /*
-                * Do not compare with later parents when we care only about
-                * the first parent chain, in order to avoid derailing the
-                * traversal to follow a side branch that brought everything
-                * in the path we are limited to by the pathspec.
-                */
-               if (revs->first_parent_only && nth_parent++)
-                       break;
+               if (nth_parent == 1) {
+                       /*
+                        * This our second loop iteration - so we now know
+                        * we're dealing with a merge.
+                        *
+                        * Do not compare with later parents when we care only about
+                        * the first parent chain, in order to avoid derailing the
+                        * traversal to follow a side branch that brought everything
+                        * in the path we are limited to by the pathspec.
+                        */
+                       if (revs->first_parent_only)
+                               break;
+                       /*
+                        * If this will remain a potentially-simplifiable
+                        * merge, remember per-parent treesame if needed.
+                        * Initialise the array with the comparison from our
+                        * first iteration.
+                        */
+                       if (revs->treesame.name &&
+                           !revs->simplify_history &&
+                           !(commit->object.flags & UNINTERESTING)) {
+                               ts = initialise_treesame(revs, commit);
+                               if (!tree_changed)
+                                       ts->treesame[0] = 1;
+                       }
+               }
                if (parse_commit(p) < 0)
                        die("cannot simplify commit %s (because of %s)",
                            sha1_to_hex(commit->object.sha1),
                            sha1_to_hex(p->object.sha1));
                switch (rev_compare_tree(revs, p, commit)) {
                case REV_TREE_SAME:
-                       tree_same = 1;
                        if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) {
                                /* Even if a merge with an uninteresting
                                 * side branch brought the entire change
@@ -481,7 +590,8 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
                                 * to lose the other branches of this
                                 * merge, so we just keep going.
                                 */
-                               pp = &parent->next;
+                               if (ts)
+                                       ts->treesame[nth_parent] = 1;
                                continue;
                        }
                        parent->next = NULL;
@@ -510,12 +620,11 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
                case REV_TREE_OLD:
                case REV_TREE_DIFFERENT:
                        tree_changed = 1;
-                       pp = &parent->next;
                        continue;
                }
                die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
        }
-       if (tree_changed && !tree_same)
+       if (tree_changed)
                return;
        commit->object.flags |= TREESAME;
 }
@@ -708,7 +817,7 @@ static int still_interesting(struct commit_list *src, unsigned long date, int sl
         * Does the destination list contain entries with a date
         * before the source list? Definitely _not_ done.
         */
-       if (date < src->item->date)
+       if (date <= src->item->date)
                return SLOP;
 
        /*
@@ -914,6 +1023,19 @@ static void add_rev_cmdline(struct rev_info *revs,
        info->nr++;
 }
 
+static void add_rev_cmdline_list(struct rev_info *revs,
+                                struct commit_list *commit_list,
+                                int whence,
+                                unsigned flags)
+{
+       while (commit_list) {
+               struct object *object = &commit_list->item->object;
+               add_rev_cmdline(revs, object, sha1_to_hex(object->sha1),
+                               whence, flags);
+               commit_list = commit_list->next;
+       }
+}
+
 struct all_refs_cb {
        int all_flags;
        int warned_bad_reflog;
@@ -1048,9 +1170,9 @@ void init_revisions(struct rev_info *revs, const char *prefix)
 
        revs->commit_format = CMIT_FMT_DEFAULT;
 
+       init_grep_defaults();
+       grep_init(&revs->grep_filter, prefix);
        revs->grep_filter.status_only = 1;
-       revs->grep_filter.pattern_tail = &(revs->grep_filter.pattern_list);
-       revs->grep_filter.header_tail = &(revs->grep_filter.header_list);
        revs->grep_filter.regflags = REG_NEWLINE;
 
        diff_setup(&revs->diffopt);
@@ -1091,6 +1213,7 @@ static void prepare_show_merge(struct rev_info *revs)
        add_pending_object(revs, &head->object, "HEAD");
        add_pending_object(revs, &other->object, "MERGE_HEAD");
        bases = get_merge_bases(head, other, 1);
+       add_rev_cmdline_list(revs, bases, REV_CMD_MERGE_BASE, UNINTERESTING);
        add_pending_commit_list(revs, bases, UNINTERESTING);
        free_commit_list(bases);
        head->object.flags |= SYMMETRIC_LEFT;
@@ -1178,6 +1301,9 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi
 
                        if (symmetric) {
                                exclude = get_merge_bases(a, b, 1);
+                               add_rev_cmdline_list(revs, exclude,
+                                                    REV_CMD_MERGE_BASE,
+                                                    flags_exclude);
                                add_pending_commit_list(revs, exclude,
                                                        flags_exclude);
                                free_commit_list(exclude);
@@ -1596,18 +1722,25 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
        } else if ((argcount = parse_long_opt("committer", argv, &optarg))) {
                add_header_grep(revs, GREP_HEADER_COMMITTER, optarg);
                return argcount;
+       } else if ((argcount = parse_long_opt("grep-reflog", argv, &optarg))) {
+               add_header_grep(revs, GREP_HEADER_REFLOG, optarg);
+               return argcount;
        } else if ((argcount = parse_long_opt("grep", argv, &optarg))) {
                add_message_grep(revs, optarg);
                return argcount;
        } else if (!strcmp(arg, "--grep-debug")) {
                revs->grep_filter.debug = 1;
+       } else if (!strcmp(arg, "--basic-regexp")) {
+               grep_set_pattern_type_option(GREP_PATTERN_TYPE_BRE, &revs->grep_filter);
        } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
-               revs->grep_filter.regflags |= REG_EXTENDED;
+               grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, &revs->grep_filter);
        } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
                revs->grep_filter.regflags |= REG_ICASE;
                DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
        } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
-               revs->grep_filter.fixed = 1;
+               grep_set_pattern_type_option(GREP_PATTERN_TYPE_FIXED, &revs->grep_filter);
+       } else if (!strcmp(arg, "--perl-regexp")) {
+               grep_set_pattern_type_option(GREP_PATTERN_TYPE_PCRE, &revs->grep_filter);
        } else if (!strcmp(arg, "--all-match")) {
                revs->grep_filter.all_match = 1;
        } else if ((argcount = parse_long_opt("encoding", argv, &optarg))) {
@@ -1891,6 +2024,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
        revs->diffopt.abbrev = revs->abbrev;
        diff_setup_done(&revs->diffopt);
 
+       grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
+                                &revs->grep_filter);
        compile_grep_patterns(&revs->grep_filter);
 
        if (revs->reverse && revs->reflog_info)
@@ -1906,6 +2041,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 
        if (revs->reflog_info && revs->graph)
                die("cannot combine --walk-reflogs with --graph");
+       if (!revs->reflog_info && revs->grep_filter.use_reflog_filter)
+               die("cannot use --grep-reflog without --walk-reflogs");
 
        return left;
 }
@@ -1918,28 +2055,32 @@ static void add_child(struct rev_info *revs, struct commit *parent, struct commi
        l->next = add_decoration(&revs->children, &parent->object, l);
 }
 
-static int remove_duplicate_parents(struct commit *commit)
+static int remove_duplicate_parents(struct rev_info *revs, struct commit *commit)
 {
+       struct treesame_state *ts = lookup_decoration(&revs->treesame, &commit->object);
        struct commit_list **pp, *p;
        int surviving_parents;
 
        /* Examine existing parents while marking ones we have seen... */
        pp = &commit->parents;
+       surviving_parents = 0;
        while ((p = *pp) != NULL) {
                struct commit *parent = p->item;
                if (parent->object.flags & TMP_MARK) {
                        *pp = p->next;
+                       if (ts)
+                               compact_treesame(revs, commit, surviving_parents);
                        continue;
                }
                parent->object.flags |= TMP_MARK;
+               surviving_parents++;
                pp = &p->next;
        }
-       /* count them while clearing the temporary mark */
-       surviving_parents = 0;
+       /* clear the temporary mark */
        for (p = commit->parents; p; p = p->next) {
                p->item->object.flags &= ~TMP_MARK;
-               surviving_parents++;
        }
+       /* no update_treesame() - removing duplicates can't affect TREESAME */
        return surviving_parents;
 }
 
@@ -1959,6 +2100,70 @@ static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs,
        return st;
 }
 
+static int mark_redundant_parents(struct rev_info *revs, struct commit *commit)
+{
+       struct commit_list *h = reduce_heads(commit->parents);
+       int i = 0, marked = 0;
+       struct commit_list *po, *pn;
+
+       /* Want these for sanity-checking only */
+       int orig_cnt = commit_list_count(commit->parents);
+       int cnt = commit_list_count(h);
+
+       /*
+        * Not ready to remove items yet, just mark them for now, based
+        * on the output of reduce_heads(). reduce_heads outputs the reduced
+        * set in its original order, so this isn't too hard.
+        */
+       po = commit->parents;
+       pn = h;
+       while (po) {
+               if (pn && po->item == pn->item) {
+                       pn = pn->next;
+                       i++;
+               } else {
+                       po->item->object.flags |= TMP_MARK;
+                       marked++;
+               }
+               po=po->next;
+       }
+
+       if (i != cnt || cnt+marked != orig_cnt)
+               die("mark_redundant_parents %d %d %d %d", orig_cnt, cnt, i, marked);
+
+       free_commit_list(h);
+
+       return marked;
+}
+
+static int remove_marked_parents(struct rev_info *revs, struct commit *commit)
+{
+       struct commit_list **pp, *p;
+       int nth_parent, removed = 0;
+
+       pp = &commit->parents;
+       nth_parent = 0;
+       while ((p = *pp) != NULL) {
+               struct commit *parent = p->item;
+               if (parent->object.flags & TMP_MARK) {
+                       parent->object.flags &= ~TMP_MARK;
+                       *pp = p->next;
+                       free(p);
+                       removed++;
+                       compact_treesame(revs, commit, nth_parent);
+                       continue;
+               }
+               pp = &p->next;
+               nth_parent++;
+       }
+
+       /* Removing parents can only increase TREESAMEness */
+       if (removed && !(commit->object.flags & TREESAME))
+               update_treesame(revs, commit);
+
+       return nth_parent;
+}
+
 static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
 {
        struct commit_list *p;
@@ -2003,7 +2208,9 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c
        }
 
        /*
-        * Rewrite our list of parents.
+        * Rewrite our list of parents. Note that this cannot
+        * affect our TREESAME flags in any way - a commit is
+        * always TREESAME to its simplification.
         */
        for (p = commit->parents; p; p = p->next) {
                pst = locate_simplify_state(revs, p->item);
@@ -2011,34 +2218,34 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c
                if (revs->first_parent_only)
                        break;
        }
-       if (!revs->first_parent_only)
-               cnt = remove_duplicate_parents(commit);
-       else
+
+       if (revs->first_parent_only)
                cnt = 1;
+       else
+               cnt = remove_duplicate_parents(revs, commit);
 
        /*
         * It is possible that we are a merge and one side branch
         * does not have any commit that touches the given paths;
-        * in such a case, the immediate parents will be rewritten
-        * to different commits.
+        * in such a case, the immediate parent from that branch
+        * will be rewritten to be the merge base.
         *
         *      o----X          X: the commit we are looking at;
         *     /    /           o: a commit that touches the paths;
         * ---o----'
         *
-        * Further reduce the parents by removing redundant parents.
+        * Detect and simplify this case.
         */
        if (1 < cnt) {
-               struct commit_list *h = reduce_heads(commit->parents);
-               cnt = commit_list_count(h);
-               free_commit_list(commit->parents);
-               commit->parents = h;
+               int marked = mark_redundant_parents(revs, commit);
+               if (marked)
+                       cnt = remove_marked_parents(revs, commit);
        }
 
        /*
         * A commit simplifies to itself if it is a root, if it is
         * UNINTERESTING, if it touches the given paths, or if it is a
-        * merge and its parents simplifies to more than one commits
+        * merge and its parents simplify to more than one commit
         * (the first two cases are already handled at the beginning of
         * this function).
         *
@@ -2146,6 +2353,10 @@ int prepare_revision_walk(struct rev_info *revs)
        if (!revs->leak_pending)
                free(list);
 
+       /* Signal whether we need per-parent treesame decoration */
+       if (revs->simplify_merges)
+               revs->treesame.name = "treesame";
+
        if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
                commit_list_sort_by_date(&revs->commits);
        if (revs->no_walk)
@@ -2205,16 +2416,110 @@ static int rewrite_parents(struct rev_info *revs, struct commit *commit)
                }
                pp = &parent->next;
        }
-       remove_duplicate_parents(commit);
+       remove_duplicate_parents(revs, commit);
+       return 0;
+}
+
+static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap)
+{
+       char *person, *endp;
+       size_t len, namelen, maillen;
+       const char *name;
+       const char *mail;
+       struct ident_split ident;
+
+       person = strstr(buf->buf, what);
+       if (!person)
+               return 0;
+
+       person += strlen(what);
+       endp = strchr(person, '\n');
+       if (!endp)
+               return 0;
+
+       len = endp - person;
+
+       if (split_ident_line(&ident, person, len))
+               return 0;
+
+       mail = ident.mail_begin;
+       maillen = ident.mail_end - ident.mail_begin;
+       name = ident.name_begin;
+       namelen = ident.name_end - ident.name_begin;
+
+       if (map_user(mailmap, &mail, &maillen, &name, &namelen)) {
+               struct strbuf namemail = STRBUF_INIT;
+
+               strbuf_addf(&namemail, "%.*s <%.*s>",
+                           (int)namelen, name, (int)maillen, mail);
+
+               strbuf_splice(buf, ident.name_begin - buf->buf,
+                             ident.mail_end - ident.name_begin + 1,
+                             namemail.buf, namemail.len);
+
+               strbuf_release(&namemail);
+
+               return 1;
+       }
+
        return 0;
 }
 
 static int commit_match(struct commit *commit, struct rev_info *opt)
 {
+       int retval;
+       const char *encoding;
+       char *message;
+       struct strbuf buf = STRBUF_INIT;
+
        if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list)
                return 1;
-       return grep_buffer(&opt->grep_filter,
-                          commit->buffer, strlen(commit->buffer));
+
+       /* Prepend "fake" headers as needed */
+       if (opt->grep_filter.use_reflog_filter) {
+               strbuf_addstr(&buf, "reflog ");
+               get_reflog_message(&buf, opt->reflog_info);
+               strbuf_addch(&buf, '\n');
+       }
+
+       /*
+        * We grep in the user's output encoding, under the assumption that it
+        * is the encoding they are most likely to write their grep pattern
+        * for. In addition, it means we will match the "notes" encoding below,
+        * so we will not end up with a buffer that has two different encodings
+        * in it.
+        */
+       encoding = get_log_output_encoding();
+       message = logmsg_reencode(commit, NULL, encoding);
+
+       /* Copy the commit to temporary if we are using "fake" headers */
+       if (buf.len)
+               strbuf_addstr(&buf, message);
+
+       if (opt->grep_filter.header_list && opt->mailmap) {
+               if (!buf.len)
+                       strbuf_addstr(&buf, message);
+
+               commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
+               commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
+       }
+
+       /* Append "fake" message parts as needed */
+       if (opt->show_notes) {
+               if (!buf.len)
+                       strbuf_addstr(&buf, message);
+               format_display_notes(commit->object.sha1, &buf, encoding, 1);
+       }
+
+       /* Find either in the original commit message, or in the temporary */
+       if (buf.len)
+               retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len);
+       else
+               retval = grep_buffer(&opt->grep_filter,
+                                    message, strlen(message));
+       strbuf_release(&buf);
+       logmsg_free(message, commit);
+       return retval;
 }
 
 static inline int want_ancestry(struct rev_info *revs)