simple euristic for further free packing improvements
[gitweb.git] / diff.c
diff --git a/diff.c b/diff.c
index ce98a90805f881fa9ef161448af22e859db0d93d..7a7b839e56ac1ba2ed0b770a047aaf07bce23722 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -8,7 +8,8 @@
 #include "quote.h"
 #include "diff.h"
 #include "diffcore.h"
-#include "xdiff/xdiff.h"
+#include "delta.h"
+#include "xdiff-interface.h"
 
 static int use_size_cache;
 
@@ -142,11 +143,12 @@ static void copy_file(int prefix, const char *data, int size)
 
 static void emit_rewrite_diff(const char *name_a,
                              const char *name_b,
-                             struct diff_filespec *one, 
+                             struct diff_filespec *one,
                              struct diff_filespec *two)
 {
-       /* Use temp[i].name as input, name_a and name_b as labels */
        int lc_a, lc_b;
+       diff_populate_filespec(one, 0);
+       diff_populate_filespec(two, 0);
        lc_a = count_lines(one->data, one->size);
        lc_b = count_lines(two->data, two->size);
        printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
@@ -194,6 +196,286 @@ static int fn_out(void *priv, mmbuffer_t *mb, int nbuf)
        return 0;
 }
 
+static char *pprint_rename(const char *a, const char *b)
+{
+       const char *old = a;
+       const char *new = b;
+       char *name = NULL;
+       int pfx_length, sfx_length;
+       int len_a = strlen(a);
+       int len_b = strlen(b);
+
+       /* Find common prefix */
+       pfx_length = 0;
+       while (*old && *new && *old == *new) {
+               if (*old == '/')
+                       pfx_length = old - a + 1;
+               old++;
+               new++;
+       }
+
+       /* Find common suffix */
+       old = a + len_a;
+       new = b + len_b;
+       sfx_length = 0;
+       while (a <= old && b <= new && *old == *new) {
+               if (*old == '/')
+                       sfx_length = len_a - (old - a);
+               old--;
+               new--;
+       }
+
+       /*
+        * pfx{mid-a => mid-b}sfx
+        * {pfx-a => pfx-b}sfx
+        * pfx{sfx-a => sfx-b}
+        * name-a => name-b
+        */
+       if (pfx_length + sfx_length) {
+               name = xmalloc(len_a + len_b - pfx_length - sfx_length + 7);
+               sprintf(name, "%.*s{%.*s => %.*s}%s",
+                       pfx_length, a,
+                       len_a - pfx_length - sfx_length, a + pfx_length,
+                       len_b - pfx_length - sfx_length, b + pfx_length,
+                       a + len_a - sfx_length);
+       }
+       else {
+               name = xmalloc(len_a + len_b + 5);
+               sprintf(name, "%s => %s", a, b);
+       }
+       return name;
+}
+
+struct diffstat_t {
+       struct xdiff_emit_state xm;
+
+       int nr;
+       int alloc;
+       struct diffstat_file {
+               char *name;
+               unsigned is_unmerged:1;
+               unsigned is_binary:1;
+               unsigned is_renamed:1;
+               unsigned int added, deleted;
+       } **files;
+};
+
+static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
+                                         const char *name_a,
+                                         const char *name_b)
+{
+       struct diffstat_file *x;
+       x = xcalloc(sizeof (*x), 1);
+       if (diffstat->nr == diffstat->alloc) {
+               diffstat->alloc = alloc_nr(diffstat->alloc);
+               diffstat->files = xrealloc(diffstat->files,
+                               diffstat->alloc * sizeof(x));
+       }
+       diffstat->files[diffstat->nr++] = x;
+       if (name_b) {
+               x->name = pprint_rename(name_a, name_b);
+               x->is_renamed = 1;
+       }
+       else
+               x->name = strdup(name_a);
+       return x;
+}
+
+static void diffstat_consume(void *priv, char *line, unsigned long len)
+{
+       struct diffstat_t *diffstat = priv;
+       struct diffstat_file *x = diffstat->files[diffstat->nr - 1];
+
+       if (line[0] == '+')
+               x->added++;
+       else if (line[0] == '-')
+               x->deleted++;
+}
+
+static const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
+static const char minuses[]= "----------------------------------------------------------------------";
+
+static void show_stats(struct diffstat_t* data)
+{
+       int i, len, add, del, total, adds = 0, dels = 0;
+       int max, max_change = 0, max_len = 0;
+       int total_files = data->nr;
+
+       if (data->nr == 0)
+               return;
+
+       for (i = 0; i < data->nr; i++) {
+               struct diffstat_file *file = data->files[i];
+
+               len = strlen(file->name);
+               if (max_len < len)
+                       max_len = len;
+
+               if (file->is_binary || file->is_unmerged)
+                       continue;
+               if (max_change < file->added + file->deleted)
+                       max_change = file->added + file->deleted;
+       }
+
+       for (i = 0; i < data->nr; i++) {
+               char *prefix = "";
+               char *name = data->files[i]->name;
+               int added = data->files[i]->added;
+               int deleted = data->files[i]->deleted;
+
+               if (0 < (len = quote_c_style(name, NULL, NULL, 0))) {
+                       char *qname = xmalloc(len + 1);
+                       quote_c_style(name, qname, NULL, 0);
+                       free(name);
+                       data->files[i]->name = name = qname;
+               }
+
+               /*
+                * "scale" the filename
+                */
+               len = strlen(name);
+               max = max_len;
+               if (max > 50)
+                       max = 50;
+               if (len > max) {
+                       char *slash;
+                       prefix = "...";
+                       max -= 3;
+                       name += len - max;
+                       slash = strchr(name, '/');
+                       if (slash)
+                               name = slash;
+               }
+               len = max;
+
+               /*
+                * scale the add/delete
+                */
+               max = max_change;
+               if (max + len > 70)
+                       max = 70 - len;
+
+               if (data->files[i]->is_binary) {
+                       printf(" %s%-*s |  Bin\n", prefix, len, name);
+                       goto free_diffstat_file;
+               }
+               else if (data->files[i]->is_unmerged) {
+                       printf(" %s%-*s |  Unmerged\n", prefix, len, name);
+                       goto free_diffstat_file;
+               }
+               else if (!data->files[i]->is_renamed &&
+                        (added + deleted == 0)) {
+                       total_files--;
+                       goto free_diffstat_file;
+               }
+
+               add = added;
+               del = deleted;
+               total = add + del;
+               adds += add;
+               dels += del;
+
+               if (max_change > 0) {
+                       total = (total * max + max_change / 2) / max_change;
+                       add = (add * max + max_change / 2) / max_change;
+                       del = total - add;
+               }
+               printf(" %s%-*s |%5d %.*s%.*s\n", prefix,
+                               len, name, added + deleted,
+                               add, pluses, del, minuses);
+       free_diffstat_file:
+               free(data->files[i]->name);
+               free(data->files[i]);
+       }
+       free(data->files);
+       printf(" %d files changed, %d insertions(+), %d deletions(-)\n",
+                       total_files, adds, dels);
+}
+
+static unsigned char *deflate_it(char *data,
+                                unsigned long size,
+                                unsigned long *result_size)
+{
+       int bound;
+       unsigned char *deflated;
+       z_stream stream;
+
+       memset(&stream, 0, sizeof(stream));
+       deflateInit(&stream, Z_BEST_COMPRESSION);
+       bound = deflateBound(&stream, size);
+       deflated = xmalloc(bound);
+       stream.next_out = deflated;
+       stream.avail_out = bound;
+
+       stream.next_in = (unsigned char *)data;
+       stream.avail_in = size;
+       while (deflate(&stream, Z_FINISH) == Z_OK)
+               ; /* nothing */
+       deflateEnd(&stream);
+       *result_size = stream.total_out;
+       return deflated;
+}
+
+static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
+{
+       void *cp;
+       void *delta;
+       void *deflated;
+       void *data;
+       unsigned long orig_size;
+       unsigned long delta_size;
+       unsigned long deflate_size;
+       unsigned long data_size;
+
+       printf("GIT binary patch\n");
+       /* We could do deflated delta, or we could do just deflated two,
+        * whichever is smaller.
+        */
+       delta = NULL;
+       deflated = deflate_it(two->ptr, two->size, &deflate_size);
+       if (one->size && two->size) {
+               delta = diff_delta(one->ptr, one->size,
+                                  two->ptr, two->size,
+                                  &delta_size, deflate_size);
+               if (delta) {
+                       void *to_free = delta;
+                       orig_size = delta_size;
+                       delta = deflate_it(delta, delta_size, &delta_size);
+                       free(to_free);
+               }
+       }
+
+       if (delta && delta_size < deflate_size) {
+               printf("delta %lu\n", orig_size);
+               free(deflated);
+               data = delta;
+               data_size = delta_size;
+       }
+       else {
+               printf("literal %lu\n", two->size);
+               free(delta);
+               data = deflated;
+               data_size = deflate_size;
+       }
+
+       /* emit data encoded in base85 */
+       cp = data;
+       while (data_size) {
+               int bytes = (52 < data_size) ? 52 : data_size;
+               char line[70];
+               data_size -= bytes;
+               if (bytes <= 26)
+                       line[0] = bytes + 'A' - 1;
+               else
+                       line[0] = bytes - 26 + 'a' - 1;
+               encode_85(line + 1, cp, bytes);
+               cp += bytes;
+               puts(line);
+       }
+       printf("\n");
+       free(data);
+}
+
 #define FIRST_FEW_BYTES 8000
 static int mmfile_is_binary(mmfile_t *mf)
 {
@@ -210,6 +492,7 @@ static void builtin_diff(const char *name_a,
                         struct diff_filespec *one,
                         struct diff_filespec *two,
                         const char *xfrm_msg,
+                        struct diff_options *o,
                         int complete_rewrite)
 {
        mmfile_t mf1, mf2;
@@ -254,8 +537,17 @@ static void builtin_diff(const char *name_a,
        if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
                die("unable to read files to diff");
 
-       if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))
-               printf("Binary files %s and %s differ\n", lbl[0], lbl[1]);
+       if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
+               /* Quite common confusing case */
+               if (mf1.size == mf2.size &&
+                   !memcmp(mf1.ptr, mf2.ptr, mf1.size))
+                       goto free_ab_and_return;
+               if (o->binary)
+                       emit_binary_diff(&mf1, &mf2);
+               else
+                       printf("Binary files %s and %s differ\n",
+                              lbl[0], lbl[1]);
+       }
        else {
                /* Crazy xdl interfaces.. */
                const char *diffopts = getenv("GIT_DIFF_OPTS");
@@ -285,6 +577,48 @@ static void builtin_diff(const char *name_a,
        return;
 }
 
+static void builtin_diffstat(const char *name_a, const char *name_b,
+                            struct diff_filespec *one,
+                            struct diff_filespec *two,
+                            struct diffstat_t *diffstat,
+                            int complete_rewrite)
+{
+       mmfile_t mf1, mf2;
+       struct diffstat_file *data;
+
+       data = diffstat_add(diffstat, name_a, name_b);
+
+       if (!one || !two) {
+               data->is_unmerged = 1;
+               return;
+       }
+       if (complete_rewrite) {
+               diff_populate_filespec(one, 0);
+               diff_populate_filespec(two, 0);
+               data->deleted = count_lines(one->data, one->size);
+               data->added = count_lines(two->data, two->size);
+               return;
+       }
+       if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
+               die("unable to read files to diff");
+
+       if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))
+               data->is_binary = 1;
+       else {
+               /* Crazy xdl interfaces.. */
+               xpparam_t xpp;
+               xdemitconf_t xecfg;
+               xdemitcb_t ecb;
+
+               xpp.flags = XDF_NEED_MINIMAL;
+               xecfg.ctxlen = 0;
+               xecfg.flags = 0;
+               ecb.outf = xdiff_outf;
+               ecb.priv = diffstat;
+               xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
+       }
+}
+
 struct diff_filespec *alloc_filespec(const char *path)
 {
        int namelen = strlen(path);
@@ -689,6 +1023,7 @@ static void run_diff_cmd(const char *pgm,
                         struct diff_filespec *one,
                         struct diff_filespec *two,
                         const char *xfrm_msg,
+                        struct diff_options *o,
                         int complete_rewrite)
 {
        if (pgm) {
@@ -698,7 +1033,7 @@ static void run_diff_cmd(const char *pgm,
        }
        if (one && two)
                builtin_diff(name, other ? other : name,
-                            one, two, xfrm_msg, complete_rewrite);
+                            one, two, xfrm_msg, o, complete_rewrite);
        else
                printf("* Unmerged path %s\n", name);
 }
@@ -732,7 +1067,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
 
        if (DIFF_PAIR_UNMERGED(p)) {
                /* unmerged */
-               run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0);
+               run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, o, 0);
                return;
        }
 
@@ -779,14 +1114,12 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
        }
 
        if (memcmp(one->sha1, two->sha1, 20)) {
-               char one_sha1[41];
                int abbrev = o->full_index ? 40 : DEFAULT_ABBREV;
-               memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
 
                len += snprintf(msg + len, sizeof(msg) - len,
                                "index %.*s..%.*s",
-                               abbrev, one_sha1, abbrev,
-                               sha1_to_hex(two->sha1));
+                               abbrev, sha1_to_hex(one->sha1),
+                               abbrev, sha1_to_hex(two->sha1));
                if (one->mode == two->mode)
                        len += snprintf(msg + len, sizeof(msg) - len,
                                        " %06o", one->mode);
@@ -804,20 +1137,44 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
                 * needs to be split into deletion and creation.
                 */
                struct diff_filespec *null = alloc_filespec(two->path);
-               run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0);
+               run_diff_cmd(NULL, name, other, one, null, xfrm_msg, o, 0);
                free(null);
                null = alloc_filespec(one->path);
-               run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0);
+               run_diff_cmd(NULL, name, other, null, two, xfrm_msg, o, 0);
                free(null);
        }
        else
-               run_diff_cmd(pgm, name, other, one, two, xfrm_msg,
+               run_diff_cmd(pgm, name, other, one, two, xfrm_msg, o,
                             complete_rewrite);
 
        free(name_munged);
        free(other_munged);
 }
 
+static void run_diffstat(struct diff_filepair *p, struct diff_options *o,
+                        struct diffstat_t *diffstat)
+{
+       const char *name;
+       const char *other;
+       int complete_rewrite = 0;
+
+       if (DIFF_PAIR_UNMERGED(p)) {
+               /* unmerged */
+               builtin_diffstat(p->one->path, NULL, NULL, NULL, diffstat, 0);
+               return;
+       }
+
+       name = p->one->path;
+       other = (strcmp(name, p->two->path) ? p->two->path : NULL);
+
+       diff_fill_sha1_info(p->one);
+       diff_fill_sha1_info(p->two);
+
+       if (p->status == DIFF_STATUS_MODIFIED && p->score)
+               complete_rewrite = 1;
+       builtin_diffstat(name, other, p->one, p->two, diffstat, complete_rewrite);
+}
+
 void diff_setup(struct diff_options *options)
 {
        memset(options, 0, sizeof(*options));
@@ -836,6 +1193,15 @@ int diff_setup_done(struct diff_options *options)
             options->detect_rename != DIFF_DETECT_COPY) ||
            (0 <= options->rename_limit && !options->detect_rename))
                return -1;
+
+       /*
+        * These cases always need recursive; we do not drop caller-supplied
+        * recursive bits for other formats here.
+        */
+       if ((options->output_format == DIFF_FORMAT_PATCH) ||
+           (options->output_format == DIFF_FORMAT_DIFFSTAT))
+               options->recursive = 1;
+
        if (options->detect_rename && options->rename_limit < 0)
                options->rename_limit = diff_rename_limit_default;
        if (options->setup & DIFF_SETUP_USE_CACHE) {
@@ -861,12 +1227,26 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
        const char *arg = av[0];
        if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
                options->output_format = DIFF_FORMAT_PATCH;
+       else if (!strcmp(arg, "--patch-with-raw")) {
+               options->output_format = DIFF_FORMAT_PATCH;
+               options->with_raw = 1;
+       }
+       else if (!strcmp(arg, "--stat"))
+               options->output_format = DIFF_FORMAT_DIFFSTAT;
+       else if (!strcmp(arg, "--patch-with-stat")) {
+               options->output_format = DIFF_FORMAT_PATCH;
+               options->with_stat = 1;
+       }
        else if (!strcmp(arg, "-z"))
                options->line_termination = 0;
        else if (!strncmp(arg, "-l", 2))
                options->rename_limit = strtoul(arg+2, NULL, 10);
        else if (!strcmp(arg, "--full-index"))
                options->full_index = 1;
+       else if (!strcmp(arg, "--binary")) {
+               options->output_format = DIFF_FORMAT_PATCH;
+               options->full_index = options->binary = 1;
+       }
        else if (!strcmp(arg, "--name-only"))
                options->output_format = DIFF_FORMAT_NAME;
        else if (!strcmp(arg, "--name-status"))
@@ -1047,13 +1427,13 @@ const char *diff_unique_abbrev(const unsigned char *sha1, int len)
 static void diff_flush_raw(struct diff_filepair *p,
                           int line_termination,
                           int inter_name_termination,
-                          struct diff_options *options)
+                          struct diff_options *options,
+                          int output_format)
 {
        int two_paths;
        char status[10];
        int abbrev = options->abbrev;
        const char *path_one, *path_two;
-       int output_format = options->output_format;
 
        path_one = p->one->path;
        path_two = p->two->path;
@@ -1155,11 +1535,24 @@ static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
 
        if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
            (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
-               return; /* no tree diffs in patch format */ 
+               return; /* no tree diffs in patch format */
 
        run_diff(p, o);
 }
 
+static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
+                           struct diffstat_t *diffstat)
+{
+       if (diff_unmodified_pair(p))
+               return;
+
+       if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
+           (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
+               return; /* no tree diffs in patch format */
+
+       run_diffstat(p, o, diffstat);
+}
+
 int diff_queue_is_empty(void)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
@@ -1269,48 +1662,88 @@ static void diff_resolve_rename_copy(void)
        diff_debug_queue("resolve-rename-copy done", q);
 }
 
-void diff_flush(struct diff_options *options)
+static void flush_one_pair(struct diff_filepair *p,
+                          int diff_output_format,
+                          struct diff_options *options,
+                          struct diffstat_t *diffstat)
 {
-       struct diff_queue_struct *q = &diff_queued_diff;
-       int i;
        int inter_name_termination = '\t';
-       int diff_output_format = options->output_format;
        int line_termination = options->line_termination;
-
        if (!line_termination)
                inter_name_termination = 0;
 
-       for (i = 0; i < q->nr; i++) {
-               struct diff_filepair *p = q->queue[i];
-
-               switch (p->status) {
-               case DIFF_STATUS_UNKNOWN:
+       switch (p->status) {
+       case DIFF_STATUS_UNKNOWN:
+               break;
+       case 0:
+               die("internal error in diff-resolve-rename-copy");
+               break;
+       default:
+               switch (diff_output_format) {
+               case DIFF_FORMAT_DIFFSTAT:
+                       diff_flush_stat(p, options, diffstat);
                        break;
-               case 0:
-                       die("internal error in diff-resolve-rename-copy");
+               case DIFF_FORMAT_PATCH:
+                       diff_flush_patch(p, options);
+                       break;
+               case DIFF_FORMAT_RAW:
+               case DIFF_FORMAT_NAME_STATUS:
+                       diff_flush_raw(p, line_termination,
+                                      inter_name_termination,
+                                      options, diff_output_format);
+                       break;
+               case DIFF_FORMAT_NAME:
+                       diff_flush_name(p,
+                                       inter_name_termination,
+                                       line_termination);
+                       break;
+               case DIFF_FORMAT_NO_OUTPUT:
                        break;
-               default:
-                       switch (diff_output_format) {
-                       case DIFF_FORMAT_PATCH:
-                               diff_flush_patch(p, options);
-                               break;
-                       case DIFF_FORMAT_RAW:
-                       case DIFF_FORMAT_NAME_STATUS:
-                               diff_flush_raw(p, line_termination,
-                                              inter_name_termination,
-                                              options);
-                               break;
-                       case DIFF_FORMAT_NAME:
-                               diff_flush_name(p,
-                                               inter_name_termination,
-                                               line_termination);
-                               break;
-                       case DIFF_FORMAT_NO_OUTPUT:
-                               break;
-                       }
                }
+       }
+}
+
+void diff_flush(struct diff_options *options)
+{
+       struct diff_queue_struct *q = &diff_queued_diff;
+       int i;
+       int diff_output_format = options->output_format;
+       struct diffstat_t *diffstat = NULL;
+
+       if (diff_output_format == DIFF_FORMAT_DIFFSTAT || options->with_stat) {
+               diffstat = xcalloc(sizeof (struct diffstat_t), 1);
+               diffstat->xm.consume = diffstat_consume;
+       }
+
+       if (options->with_raw) {
+               for (i = 0; i < q->nr; i++) {
+                       struct diff_filepair *p = q->queue[i];
+                       flush_one_pair(p, DIFF_FORMAT_RAW, options, NULL);
+               }
+               putchar(options->line_termination);
+       }
+       if (options->with_stat) {
+               for (i = 0; i < q->nr; i++) {
+                       struct diff_filepair *p = q->queue[i];
+                       flush_one_pair(p, DIFF_FORMAT_DIFFSTAT, options,
+                                      diffstat);
+               }
+               show_stats(diffstat);
+               free(diffstat);
+               diffstat = NULL;
+               putchar(options->line_termination);
+       }
+       for (i = 0; i < q->nr; i++) {
+               struct diff_filepair *p = q->queue[i];
+               flush_one_pair(p, diff_output_format, options, diffstat);
                diff_free_filepair(p);
        }
+
+       if (diffstat) {
+               show_stats(diffstat);
+               free(diffstat);
+       }
+
        free(q->queue);
        q->queue = NULL;
        q->nr = q->alloc = 0;
@@ -1374,8 +1807,6 @@ static void diffcore_apply_filter(const char *filter)
 
 void diffcore_std(struct diff_options *options)
 {
-       if (options->paths && options->paths[0])
-               diffcore_pathspec(options->paths);
        if (options->break_opt != -1)
                diffcore_break(options->break_opt);
        if (options->detect_rename)