index-pack: smarter memory usage during delta resolution
[gitweb.git] / diff.c
diff --git a/diff.c b/diff.c
index e7afbe28c1379fbd003de905e1d7cb87ff37971e..1c6be897b2c95fc481c02834e4fe022b6bd405ae 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -24,6 +24,7 @@ static int diff_suppress_blank_empty;
 int diff_use_color_default = -1;
 static const char *external_diff_cmd_cfg;
 int diff_auto_refresh_index = 1;
+static int diff_mnemonic_prefix;
 
 static char diff_colors[][COLOR_MAXLEN] = {
        "\033[m",       /* reset */
@@ -95,32 +96,37 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
  * to define a customized regexp to find the beginning of a function to
  * be used for hunk header lines of "diff -p" style output.
  */
-static struct funcname_pattern {
+struct funcname_pattern_entry {
        char *name;
        char *pattern;
-       struct funcname_pattern *next;
+       int cflags;
+};
+static struct funcname_pattern_list {
+       struct funcname_pattern_list *next;
+       struct funcname_pattern_entry e;
 } *funcname_pattern_list;
 
-static int parse_funcname_pattern(const char *var, const char *ep, const char *value)
+static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags)
 {
        const char *name;
        int namelen;
-       struct funcname_pattern *pp;
+       struct funcname_pattern_list *pp;
 
        name = var + 5; /* "diff." */
        namelen = ep - name;
 
        for (pp = funcname_pattern_list; pp; pp = pp->next)
-               if (!strncmp(pp->name, name, namelen) && !pp->name[namelen])
+               if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen])
                        break;
        if (!pp) {
                pp = xcalloc(1, sizeof(*pp));
-               pp->name = xmemdupz(name, namelen);
+               pp->e.name = xmemdupz(name, namelen);
                pp->next = funcname_pattern_list;
                funcname_pattern_list = pp;
        }
-       free(pp->pattern);
-       pp->pattern = xstrdup(value);
+       free(pp->e.pattern);
+       pp->e.pattern = xstrdup(value);
+       pp->e.cflags = cflags;
        return 0;
 }
 
@@ -150,6 +156,10 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
                diff_auto_refresh_index = git_config_bool(var, value);
                return 0;
        }
+       if (!strcmp(var, "diff.mnemonicprefix")) {
+               diff_mnemonic_prefix = git_config_bool(var, value);
+               return 0;
+       }
        if (!strcmp(var, "diff.external"))
                return git_config_string(&external_diff_cmd_cfg, var, value);
        if (!prefixcmp(var, "diff.")) {
@@ -189,7 +199,13 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
                        if (!strcmp(ep, ".funcname")) {
                                if (!value)
                                        return config_error_nonbool(var);
-                               return parse_funcname_pattern(var, ep, value);
+                               return parse_funcname_pattern(var, ep, value,
+                                       0);
+                       } else if (!strcmp(ep, ".xfuncname")) {
+                               if (!value)
+                                       return config_error_nonbool(var);
+                               return parse_funcname_pattern(var, ep, value,
+                                       REG_EXTENDED);
                        }
                }
        }
@@ -201,9 +217,8 @@ static char *quote_two(const char *one, const char *two)
 {
        int need_one = quote_c_style(one, NULL, NULL, 1);
        int need_two = quote_c_style(two, NULL, NULL, 1);
-       struct strbuf res;
+       struct strbuf res = STRBUF_INIT;
 
-       strbuf_init(&res, 0);
        if (need_one + need_two) {
                strbuf_addch(&res, '"');
                quote_c_style(one, &res, NULL, 1);
@@ -312,6 +327,15 @@ static void emit_rewrite_diff(const char *name_a,
        const char *new = diff_get_color(color_diff, DIFF_FILE_NEW);
        const char *reset = diff_get_color(color_diff, DIFF_RESET);
        static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
+       const char *a_prefix, *b_prefix;
+
+       if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
+               a_prefix = o->b_prefix;
+               b_prefix = o->a_prefix;
+       } else {
+               a_prefix = o->a_prefix;
+               b_prefix = o->b_prefix;
+       }
 
        name_a += (*name_a == '/');
        name_b += (*name_b == '/');
@@ -320,8 +344,8 @@ static void emit_rewrite_diff(const char *name_a,
 
        strbuf_reset(&a_name);
        strbuf_reset(&b_name);
-       quote_two_c_style(&a_name, o->a_prefix, name_a, 0);
-       quote_two_c_style(&b_name, o->b_prefix, name_b, 0);
+       quote_two_c_style(&a_name, a_prefix, name_a, 0);
+       quote_two_c_style(&b_name, b_prefix, name_b, 0);
 
        diff_populate_filespec(one, 0);
        diff_populate_filespec(two, 0);
@@ -513,13 +537,20 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix)
 
 static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len)
 {
-       int has_trailing_newline = (len > 0 && line[len-1] == '\n');
+       int has_trailing_newline, has_trailing_carriage_return;
+
+       has_trailing_newline = (len > 0 && line[len-1] == '\n');
        if (has_trailing_newline)
                len--;
+       has_trailing_carriage_return = (len > 0 && line[len-1] == '\r');
+       if (has_trailing_carriage_return)
+               len--;
 
        fputs(set, file);
        fwrite(line, len, 1, file);
        fputs(reset, file);
+       if (has_trailing_carriage_return)
+               fputc('\r', file);
        if (has_trailing_newline)
                fputc('\n', file);
 }
@@ -651,7 +682,7 @@ static char *pprint_rename(const char *a, const char *b)
 {
        const char *old = a;
        const char *new = b;
-       struct strbuf name;
+       struct strbuf name = STRBUF_INIT;
        int pfx_length, sfx_length;
        int len_a = strlen(a);
        int len_b = strlen(b);
@@ -659,7 +690,6 @@ static char *pprint_rename(const char *a, const char *b)
        int qlen_a = quote_c_style(a, NULL, NULL, 0);
        int qlen_b = quote_c_style(b, NULL, NULL, 0);
 
-       strbuf_init(&name, 0);
        if (qlen_a || qlen_b) {
                quote_c_style(a, &name, NULL, 0);
                strbuf_addstr(&name, " => ");
@@ -802,8 +832,7 @@ static void fill_print_name(struct diffstat_file *file)
                return;
 
        if (!file->is_renamed) {
-               struct strbuf buf;
-               strbuf_init(&buf, 0);
+               struct strbuf buf = STRBUF_INIT;
                if (quote_c_style(file->name, &buf, NULL, 0)) {
                        pname = strbuf_detach(&buf, NULL);
                } else {
@@ -1110,9 +1139,13 @@ static void show_dirstat(struct diff_options *options)
                /*
                 * Original minus copied is the removed material,
                 * added is the new material.  They are both damages
-                * made to the preimage.
+                * made to the preimage. In --dirstat-by-file mode, count
+                * damaged files, not damaged lines. This is done by
+                * counting only a single damaged line per file.
                 */
                damage = (p->one->size - copied) + added;
+               if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0)
+                       damage = 1;
 
                ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
                dir.files[dir.nr].name = name;
@@ -1375,42 +1408,53 @@ int diff_filespec_is_binary(struct diff_filespec *one)
        return one->is_binary;
 }
 
-static const char *funcname_pattern(const char *ident)
+static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 {
-       struct funcname_pattern *pp;
+       struct funcname_pattern_list *pp;
 
        for (pp = funcname_pattern_list; pp; pp = pp->next)
-               if (!strcmp(ident, pp->name))
-                       return pp->pattern;
+               if (!strcmp(ident, pp->e.name))
+                       return &pp->e;
        return NULL;
 }
 
-static struct builtin_funcname_pattern {
-       const char *name;
-       const char *pattern;
-} builtin_funcname_pattern[] = {
-       { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$" },
-       { "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$" },
-       { "java", "!^[  ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
-                       "new\\|return\\|switch\\|throw\\|while\\)\n"
-                       "^[     ]*\\(\\([       ]*"
-                       "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
-                       "[      ]*([^;]*\\)$" },
-       { "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
-                       "destructor\\|interface\\|implementation\\|"
-                       "initialization\\|finalization\\)[ \t]*.*\\)$"
-                       "\\|"
-                       "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$"
-                       },
-       { "php", "^[\t ]*\\(\\(function\\|class\\).*\\)" },
-       { "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$" },
-       { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" },
-       { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$" },
+static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
+       { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+         REG_EXTENDED },
+       { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
+       { "java",
+         "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
+         "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+         REG_EXTENDED },
+       { "objc",
+         /* Negate C statements that can look like functions */
+         "!^[ \t]*(do|for|if|else|return|switch|while)\n"
+         /* Objective-C methods */
+         "^[ \t]*([-+][ \t]*\\([ \t]*[A-Za-z_][A-Za-z_0-9* \t]*\\)[ \t]*[A-Za-z_].*)$\n"
+         /* C functions */
+         "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
+         /* Objective-C class/protocol definitions */
+         "^(@(implementation|interface|protocol)[ \t].*)$",
+         REG_EXTENDED },
+       { "pascal",
+         "^((procedure|function|constructor|destructor|interface|"
+               "implementation|initialization|finalization)[ \t]*.*)$"
+         "\n"
+         "^(.*=[ \t]*(class|record).*)$",
+         REG_EXTENDED },
+       { "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
+       { "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
+       { "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+         REG_EXTENDED },
+       { "tex",
+         "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+         REG_EXTENDED },
 };
 
-static const char *diff_funcname_pattern(struct diff_filespec *one)
+static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
 {
-       const char *ident, *pattern;
+       const char *ident;
+       const struct funcname_pattern_entry *pe;
        int i;
 
        diff_filespec_check_attr(one);
@@ -1425,9 +1469,9 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
                return funcname_pattern("default");
 
        /* Look up custom "funcname.$ident" regexp from config. */
-       pattern = funcname_pattern(ident);
-       if (pattern)
-               return pattern;
+       pe = funcname_pattern(ident);
+       if (pe)
+               return pe;
 
        /*
         * And define built-in fallback patterns here.  Note that
@@ -1435,11 +1479,19 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
         */
        for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++)
                if (!strcmp(ident, builtin_funcname_pattern[i].name))
-                       return builtin_funcname_pattern[i].pattern;
+                       return &builtin_funcname_pattern[i];
 
        return NULL;
 }
 
+void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
+{
+       if (!options->a_prefix)
+               options->a_prefix = a;
+       if (!options->b_prefix)
+               options->b_prefix = b;
+}
+
 static void builtin_diff(const char *name_a,
                         const char *name_b,
                         struct diff_filespec *one,
@@ -1453,9 +1505,23 @@ static void builtin_diff(const char *name_a,
        char *a_one, *b_two;
        const char *set = diff_get_color_opt(o, DIFF_METAINFO);
        const char *reset = diff_get_color_opt(o, DIFF_RESET);
+       const char *a_prefix, *b_prefix;
+
+       diff_set_mnemonic_prefix(o, "a/", "b/");
+       if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
+               a_prefix = o->b_prefix;
+               b_prefix = o->a_prefix;
+       } else {
+               a_prefix = o->a_prefix;
+               b_prefix = o->b_prefix;
+       }
+
+       /* Never use a non-valid filename anywhere if at all possible */
+       name_a = DIFF_FILE_VALID(one) ? name_a : name_b;
+       name_b = DIFF_FILE_VALID(two) ? name_b : name_a;
 
-       a_one = quote_two(o->a_prefix, name_a + (*name_a == '/'));
-       b_two = quote_two(o->b_prefix, name_b + (*name_b == '/'));
+       a_one = quote_two(a_prefix, name_a + (*name_a == '/'));
+       b_two = quote_two(b_prefix, name_b + (*name_b == '/'));
        lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
        lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
        fprintf(o->file, "%sdiff --git %s %s%s\n", set, a_one, b_two, reset);
@@ -1513,11 +1579,11 @@ static void builtin_diff(const char *name_a,
                xdemitconf_t xecfg;
                xdemitcb_t ecb;
                struct emit_callback ecbdata;
-               const char *funcname_pattern;
+               const struct funcname_pattern_entry *pe;
 
-               funcname_pattern = diff_funcname_pattern(one);
-               if (!funcname_pattern)
-                       funcname_pattern = diff_funcname_pattern(two);
+               pe = diff_funcname_pattern(one);
+               if (!pe)
+                       pe = diff_funcname_pattern(two);
 
                memset(&xecfg, 0, sizeof(xecfg));
                memset(&ecbdata, 0, sizeof(ecbdata));
@@ -1529,8 +1595,8 @@ static void builtin_diff(const char *name_a,
                xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
                xecfg.ctxlen = o->context;
                xecfg.flags = XDL_EMIT_FUNCNAMES;
-               if (funcname_pattern)
-                       xdiff_set_find_func(&xecfg, funcname_pattern);
+               if (pe)
+                       xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
                if (!diffopts)
                        ;
                else if (!prefixcmp(diffopts, "--unified="))
@@ -1751,10 +1817,9 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int
 
 static int populate_from_stdin(struct diff_filespec *s)
 {
-       struct strbuf buf;
+       struct strbuf buf = STRBUF_INIT;
        size_t size = 0;
 
-       strbuf_init(&buf, 0);
        if (strbuf_read(&buf, 0, 0) < 0)
                return error("error while reading from stdin %s",
                                     strerror(errno));
@@ -1806,7 +1871,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 
        if (!s->sha1_valid ||
            reuse_worktree_file(s->path, s->sha1, 0)) {
-               struct strbuf buf;
+               struct strbuf buf = STRBUF_INIT;
                struct stat st;
                int fd;
 
@@ -1849,7 +1914,6 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
                /*
                 * Convert from working tree format to canonical git format
                 */
-               strbuf_init(&buf, 0);
                if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
                        size_t size = 0;
                        munmap(s->data, s->size);
@@ -2312,8 +2376,10 @@ void diff_setup(struct diff_options *options)
                DIFF_OPT_CLR(options, COLOR_DIFF);
        options->detect_rename = diff_detect_rename_default;
 
-       options->a_prefix = "a/";
-       options->b_prefix = "b/";
+       if (!diff_mnemonic_prefix) {
+               options->a_prefix = "a/";
+               options->b_prefix = "b/";
+       }
 }
 
 int diff_setup_done(struct diff_options *options)
@@ -2396,13 +2462,6 @@ int diff_setup_done(struct diff_options *options)
                DIFF_OPT_SET(options, EXIT_WITH_STATUS);
        }
 
-       /*
-        * If we postprocess in diffcore, we cannot simply return
-        * upon the first hit.  We need to run diff as usual.
-        */
-       if (options->pickaxe || options->filter)
-               DIFF_OPT_CLR(options, QUIET);
-
        return 0;
 }
 
@@ -2477,6 +2536,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
        else if (!strcmp(arg, "--cumulative")) {
                options->output_format |= DIFF_FORMAT_DIRSTAT;
                DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE);
+       } else if (opt_arg(arg, 0, "dirstat-by-file",
+                          &options->dirstat_percent)) {
+               options->output_format |= DIFF_FORMAT_DIRSTAT;
+               DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
        }
        else if (!strcmp(arg, "--check"))
                options->output_format |= DIFF_FORMAT_CHECKDIFF;
@@ -3033,7 +3096,7 @@ static void diff_summary(FILE *file, struct diff_filepair *p)
 }
 
 struct patch_id_t {
-       SHA_CTX *ctx;
+       git_SHA_CTX *ctx;
        int patchlen;
 };
 
@@ -3061,7 +3124,7 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
 
        new_len = remove_space(line, len);
 
-       SHA1_Update(data->ctx, line, new_len);
+       git_SHA1_Update(data->ctx, line, new_len);
        data->patchlen += new_len;
 }
 
@@ -3070,11 +3133,11 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
        int i;
-       SHA_CTX ctx;
+       git_SHA_CTX ctx;
        struct patch_id_t data;
        char buffer[PATH_MAX * 4 + 20];
 
-       SHA1_Init(&ctx);
+       git_SHA1_Init(&ctx);
        memset(&data, 0, sizeof(struct patch_id_t));
        data.ctx = &ctx;
 
@@ -3136,7 +3199,7 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
                                        len2, p->two->path,
                                        len1, p->one->path,
                                        len2, p->two->path);
-               SHA1_Update(&ctx, buffer, len1);
+               git_SHA1_Update(&ctx, buffer, len1);
 
                xpp.flags = XDF_NEED_MINIMAL;
                xecfg.ctxlen = 3;
@@ -3145,7 +3208,7 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
                              &xpp, &xecfg, &ecb);
        }
 
-       SHA1_Final(sha1, &ctx);
+       git_SHA1_Final(sha1, &ctx);
        return 0;
 }
 
@@ -3390,10 +3453,7 @@ static void diffcore_skip_stat_unmatch(struct diff_options *diffopt)
 
 void diffcore_std(struct diff_options *options)
 {
-       if (DIFF_OPT_TST(options, QUIET))
-               return;
-
-       if (options->skip_stat_unmatch && !DIFF_OPT_TST(options, FIND_COPIES_HARDER))
+       if (options->skip_stat_unmatch)
                diffcore_skip_stat_unmatch(options);
        if (options->break_opt != -1)
                diffcore_break(options->break_opt);