combine-diff: speed it up, by using multiparent diff tree-walker directly
[gitweb.git] / combine-diff.c
index 3b92c44880228a94f71a428b0f11bb1caf693c67..12764fb733f3ca57f12c7fc567eec02d234c889e 100644 (file)
 static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
-       struct combine_diff_path *p;
-       int i;
+       struct combine_diff_path *p, **tail = &curr;
+       int i, cmp;
 
        if (!n) {
-               struct combine_diff_path *list = NULL, **tail = &list;
                for (i = 0; i < q->nr; i++) {
                        int len;
                        const char *path;
@@ -31,7 +30,6 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
                        p->path = (char *) &(p->parent[num_parent]);
                        memcpy(p->path, path, len);
                        p->path[len] = 0;
-                       p->len = len;
                        p->next = NULL;
                        memset(p->parent, 0,
                               sizeof(p->parent[0]) * num_parent);
@@ -44,31 +42,37 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
                        *tail = p;
                        tail = &p->next;
                }
-               return list;
+               return curr;
        }
 
-       for (p = curr; p; p = p->next) {
-               int found = 0;
-               if (!p->len)
+       /*
+        * paths in curr (linked list) and q->queue[] (array) are
+        * both sorted in the tree order.
+        */
+       i = 0;
+       while ((p = *tail) != NULL) {
+               cmp = ((i >= q->nr)
+                      ? -1 : strcmp(p->path, q->queue[i]->two->path));
+
+               if (cmp < 0) {
+                       /* p->path not in q->queue[]; drop it */
+                       *tail = p->next;
+                       free(p);
                        continue;
-               for (i = 0; i < q->nr; i++) {
-                       const char *path;
-                       int len;
+               }
 
-                       if (diff_unmodified_pair(q->queue[i]))
-                               continue;
-                       path = q->queue[i]->two->path;
-                       len = strlen(path);
-                       if (len == p->len && !memcmp(path, p->path, len)) {
-                               found = 1;
-                               hashcpy(p->parent[n].sha1, q->queue[i]->one->sha1);
-                               p->parent[n].mode = q->queue[i]->one->mode;
-                               p->parent[n].status = q->queue[i]->status;
-                               break;
-                       }
+               if (cmp > 0) {
+                       /* q->queue[i] not in p->path; skip it */
+                       i++;
+                       continue;
                }
-               if (!found)
-                       p->len = 0;
+
+               hashcpy(p->parent[n].sha1, q->queue[i]->one->sha1);
+               p->parent[n].mode = q->queue[i]->one->mode;
+               p->parent[n].status = q->queue[i]->status;
+
+               tail = &p->next;
+               i++;
        }
        return curr;
 }
@@ -1219,8 +1223,6 @@ void show_combined_diff(struct combine_diff_path *p,
 {
        struct diff_options *opt = &rev->diffopt;
 
-       if (!p->len)
-               return;
        if (opt->output_format & (DIFF_FORMAT_RAW |
                                  DIFF_FORMAT_NAME |
                                  DIFF_FORMAT_NAME_STATUS))
@@ -1284,17 +1286,96 @@ static void handle_combined_callback(struct diff_options *opt,
        q.queue = xcalloc(num_paths, sizeof(struct diff_filepair *));
        q.alloc = num_paths;
        q.nr = num_paths;
-       for (i = 0, p = paths; p; p = p->next) {
-               if (!p->len)
-                       continue;
+       for (i = 0, p = paths; p; p = p->next)
                q.queue[i++] = combined_pair(p, num_parent);
-       }
        opt->format_callback(&q, opt, opt->format_callback_data);
        for (i = 0; i < num_paths; i++)
                free_combined_pair(q.queue[i]);
        free(q.queue);
 }
 
+static const char *path_path(void *obj)
+{
+       struct combine_diff_path *path = (struct combine_diff_path *)obj;
+
+       return path->path;
+}
+
+
+/* find set of paths that every parent touches */
+static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
+       const struct sha1_array *parents, struct diff_options *opt)
+{
+       struct combine_diff_path *paths = NULL;
+       int i, num_parent = parents->nr;
+
+       int output_format = opt->output_format;
+       const char *orderfile = opt->orderfile;
+
+       opt->output_format = DIFF_FORMAT_NO_OUTPUT;
+       /* tell diff_tree to emit paths in sorted (=tree) order */
+       opt->orderfile = NULL;
+
+       /* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn)  (wrt paths) */
+       for (i = 0; i < num_parent; i++) {
+               /*
+                * show stat against the first parent even when doing
+                * combined diff.
+                */
+               int stat_opt = (output_format &
+                               (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
+               if (i == 0 && stat_opt)
+                       opt->output_format = stat_opt;
+               else
+                       opt->output_format = DIFF_FORMAT_NO_OUTPUT;
+               diff_tree_sha1(parents->sha1[i], sha1, "", opt);
+               diffcore_std(opt);
+               paths = intersect_paths(paths, i, num_parent);
+
+               /* if showing diff, show it in requested order */
+               if (opt->output_format != DIFF_FORMAT_NO_OUTPUT &&
+                   orderfile) {
+                       diffcore_order(orderfile);
+               }
+
+               diff_flush(opt);
+       }
+
+       opt->output_format = output_format;
+       opt->orderfile = orderfile;
+       return paths;
+}
+
+
+/*
+ * find set of paths that everybody touches, assuming diff is run without
+ * rename/copy detection, etc, comparing all trees simultaneously (= faster).
+ */
+static struct combine_diff_path *find_paths_multitree(
+       const unsigned char *sha1, const struct sha1_array *parents,
+       struct diff_options *opt)
+{
+       int i, nparent = parents->nr;
+       const unsigned char **parents_sha1;
+       struct combine_diff_path paths_head;
+       struct strbuf base;
+
+       parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0]));
+       for (i = 0; i < nparent; i++)
+               parents_sha1[i] = parents->sha1[i];
+
+       /* fake list head, so worker can assume it is non-NULL */
+       paths_head.next = NULL;
+
+       strbuf_init(&base, PATH_MAX);
+       diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt);
+
+       strbuf_release(&base);
+       free(parents_sha1);
+       return paths_head.next;
+}
+
+
 void diff_tree_combined(const unsigned char *sha1,
                        const struct sha1_array *parents,
                        int dense,
@@ -1302,55 +1383,114 @@ void diff_tree_combined(const unsigned char *sha1,
 {
        struct diff_options *opt = &rev->diffopt;
        struct diff_options diffopts;
-       struct combine_diff_path *p, *paths = NULL;
+       struct combine_diff_path *p, *paths;
        int i, num_paths, needsep, show_log_first, num_parent = parents->nr;
+       int need_generic_pathscan;
+
+       /* nothing to do, if no parents */
+       if (!num_parent)
+               return;
+
+       show_log_first = !!rev->loginfo && !rev->no_commit_id;
+       needsep = 0;
+       if (show_log_first) {
+               show_log(rev);
+
+               if (rev->verbose_header && opt->output_format)
+                       printf("%s%c", diff_line_prefix(opt),
+                              opt->line_termination);
+       }
 
        diffopts = *opt;
        copy_pathspec(&diffopts.pathspec, &opt->pathspec);
-       diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
        DIFF_OPT_SET(&diffopts, RECURSIVE);
        DIFF_OPT_CLR(&diffopts, ALLOW_EXTERNAL);
 
-       show_log_first = !!rev->loginfo && !rev->no_commit_id;
-       needsep = 0;
-       /* find set of paths that everybody touches */
-       for (i = 0; i < num_parent; i++) {
-               /* show stat against the first parent even
+       /* find set of paths that everybody touches
+        *
+        * NOTE
+        *
+        * Diffcore transformations are bound to diff_filespec and logic
+        * comparing two entries - i.e. they do not apply directly to combine
+        * diff.
+        *
+        * If some of such transformations is requested - we launch generic
+        * path scanning, which works significantly slower compared to
+        * simultaneous all-trees-in-one-go scan in find_paths_multitree().
+        *
+        * TODO some of the filters could be ported to work on
+        * combine_diff_paths - i.e. all functionality that skips paths, so in
+        * theory, we could end up having only multitree path scanning.
+        *
+        * NOTE please keep this semantically in sync with diffcore_std()
+        */
+       need_generic_pathscan = opt->skip_stat_unmatch  ||
+                       DIFF_OPT_TST(opt, FOLLOW_RENAMES)       ||
+                       opt->break_opt != -1    ||
+                       opt->detect_rename      ||
+                       opt->pickaxe            ||
+                       opt->filter;
+
+
+       if (need_generic_pathscan) {
+               /*
+                * NOTE generic case also handles --stat, as it computes
+                * diff(sha1,parent_i) for all i to do the job, specifically
+                * for parent0.
+                */
+               paths = find_paths_generic(sha1, parents, &diffopts);
+       }
+       else {
+               int stat_opt;
+               paths = find_paths_multitree(sha1, parents, &diffopts);
+
+               /*
+                * show stat against the first parent even
                 * when doing combined diff.
                 */
-               int stat_opt = (opt->output_format &
+               stat_opt = (opt->output_format &
                                (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
-               if (i == 0 && stat_opt)
+               if (stat_opt) {
                        diffopts.output_format = stat_opt;
-               else
-                       diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
-               diff_tree_sha1(parents->sha1[i], sha1, "", &diffopts);
-               diffcore_std(&diffopts);
-               paths = intersect_paths(paths, i, num_parent);
-
-               if (show_log_first && i == 0) {
-                       show_log(rev);
 
-                       if (rev->verbose_header && opt->output_format)
-                               printf("%s%c", diff_line_prefix(opt),
-                                      opt->line_termination);
+                       diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts);
+                       diffcore_std(&diffopts);
+                       if (opt->orderfile)
+                               diffcore_order(opt->orderfile);
+                       diff_flush(&diffopts);
                }
-               diff_flush(&diffopts);
        }
 
-       /* find out surviving paths */
-       for (num_paths = 0, p = paths; p; p = p->next) {
-               if (p->len)
-                       num_paths++;
+       /* find out number of surviving paths */
+       for (num_paths = 0, p = paths; p; p = p->next)
+               num_paths++;
+
+       /* order paths according to diffcore_order */
+       if (opt->orderfile && num_paths) {
+               struct obj_order *o;
+
+               o = xmalloc(sizeof(*o) * num_paths);
+               for (i = 0, p = paths; p; p = p->next, i++)
+                       o[i].obj = p;
+               order_objects(opt->orderfile, path_path, o, num_paths);
+               for (i = 0; i < num_paths - 1; i++) {
+                       p = o[i].obj;
+                       p->next = o[i+1].obj;
+               }
+
+               p = o[num_paths-1].obj;
+               p->next = NULL;
+               paths = o[0].obj;
+               free(o);
        }
+
+
        if (num_paths) {
                if (opt->output_format & (DIFF_FORMAT_RAW |
                                          DIFF_FORMAT_NAME |
                                          DIFF_FORMAT_NAME_STATUS)) {
-                       for (p = paths; p; p = p->next) {
-                               if (p->len)
-                                       show_raw_diff(p, num_parent, rev);
-                       }
+                       for (p = paths; p; p = p->next)
+                               show_raw_diff(p, num_parent, rev);
                        needsep = 1;
                }
                else if (opt->output_format &
@@ -1363,11 +1503,9 @@ void diff_tree_combined(const unsigned char *sha1,
                        if (needsep)
                                printf("%s%c", diff_line_prefix(opt),
                                       opt->line_termination);
-                       for (p = paths; p; p = p->next) {
-                               if (p->len)
-                                       show_patch_diff(p, num_parent, dense,
-                                                       0, rev);
-                       }
+                       for (p = paths; p; p = p->next)
+                               show_patch_diff(p, num_parent, dense,
+                                               0, rev);
                }
        }