From: Junio C Hamano Date: Fri, 13 May 2011 18:01:32 +0000 (-0700) Subject: Merge branch 'jh/dirstat-lines' X-Git-Tag: v1.7.6-rc0~77 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/df54e2bfd6afd0e7dcf5c658c92f469fdd1b06ff?ds=inline;hp=-c Merge branch 'jh/dirstat-lines' * jh/dirstat-lines: Mark dirstat error messages for translation Improve error handling when parsing dirstat parameters New --dirstat=lines mode, doing dirstat analysis based on diffstat Allow specifying --dirstat cut-off percentage as a floating point number Add config variable for specifying default --dirstat behavior Refactor --dirstat parsing; deprecate --cumulative and --dirstat-by-file Make --dirstat=0 output directories that contribute < 0.1% of changes Add several testcases for --dirstat and friends --- df54e2bfd6afd0e7dcf5c658c92f469fdd1b06ff diff --combined Documentation/diff-options.txt index 9ca565d708,bddceb06b7..c7ed946357 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@@ -66,19 -66,49 +66,49 @@@ endif::git-format-patch[ number of modified files, as well as number of added and deleted lines. - --dirstat[=]:: - Output the distribution of relative amount of changes (number of lines added or - removed) for each sub-directory. Directories with changes below - a cut-off percent (3% by default) are not shown. The cut-off percent - can be set with `--dirstat=`. Changes in a child directory are not - counted for the parent directory, unless `--cumulative` is used. + --dirstat[=]:: + Output the distribution of relative amount of changes for each + sub-directory. The behavior of `--dirstat` can be customized by + passing it a comma separated list of parameters. + The defaults are controlled by the `diff.dirstat` configuration + variable (see linkgit:git-config[1]). + The following parameters are available: + - Note that the `--dirstat` option computes the changes while ignoring - the amount of pure code movements within a file. In other words, - rearranging lines in a file is not counted as much as other changes. - - --dirstat-by-file[=]:: - Same as `--dirstat`, but counts changed files instead of lines. + -- + `changes`;; + Compute the dirstat numbers by counting the lines that have been + removed from the source, or added to the destination. This ignores + the amount of pure code movements within a file. In other words, + rearranging lines in a file is not counted as much as other changes. + This is the default behavior when no parameter is given. + `lines`;; + Compute the dirstat numbers by doing the regular line-based diff + analysis, and summing the removed/added line counts. (For binary + files, count 64-byte chunks instead, since binary files have no + natural concept of lines). This is a more expensive `--dirstat` + behavior than the `changes` behavior, but it does count rearranged + lines within a file as much as other changes. The resulting output + is consistent with what you get from the other `--*stat` options. + `files`;; + Compute the dirstat numbers by counting the number of files changed. + Each changed file counts equally in the dirstat analysis. This is + the computationally cheapest `--dirstat` behavior, since it does + not have to look at the file contents at all. + `cumulative`;; + Count changes in a child directory for the parent directory as well. + Note that when using `cumulative`, the sum of the percentages + reported may exceed 100%. The default (non-cumulative) behavior can + be specified with the `noncumulative` parameter. + ;; + An integer parameter specifies a cut-off percent (3% by default). + Directories contributing less than this percentage of the changes + are not shown in the output. + -- + + + Example: The following will count changed files, while ignoring + directories with less than 10% of the total amount of changed files, + and accumulating child directory counts in the parent directories: + `--dirstat=files,10,cumulative`. --summary:: Output a condensed summary of extended header information @@@ -124,19 -154,12 +154,19 @@@ any of those replacements occurred --color[=]:: Show colored diff. - The value must be always (the default), never, or auto. + The value must be `always` (the default for ``), `never`, or `auto`. + The default value is `never`. +ifdef::git-diff[] + It can be changed by the `color.ui` and `color.diff` + configuration settings. +endif::git-diff[] --no-color:: - Turn off colored diff, even when the configuration file - gives the default to color output. - Same as `--color=never`. + Turn off colored diff. +ifdef::git-diff[] + This can be used to override configuration settings. +endif::git-diff[] + It is the same as `--color=never`. --word-diff[=]:: Show a word diff, using the to delimit changed words. @@@ -250,7 -273,7 +280,7 @@@ ifdef::git-log[ For following files across renames while traversing history, see `--follow`. endif::git-log[] - If `n` is specified, it is a is a threshold on the similarity + If `n` is specified, it is a threshold on the similarity index (i.e. amount of addition/deletions compared to the file's size). For example, `-M90%` means git should consider a delete/add pair to be a rename if more than 90% of the file diff --combined diff.c index feced34343,097eb4c47a..ba5f7aa217 --- a/diff.c +++ b/diff.c @@@ -31,6 -31,7 +31,7 @@@ static const char *external_diff_cmd_cf int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; static int diff_no_prefix; + static int diff_dirstat_permille_default = 30; static struct diff_options default_diff_options; static char diff_colors[][COLOR_MAXLEN] = { @@@ -66,6 -67,58 +67,58 @@@ static int parse_diff_color_slot(const return -1; } + static int parse_dirstat_params(struct diff_options *options, const char *params, + struct strbuf *errmsg) + { + const char *p = params; + int p_len, ret = 0; + + while (*p) { + p_len = strchrnul(p, ',') - p; + if (!memcmp(p, "changes", p_len)) { + DIFF_OPT_CLR(options, DIRSTAT_BY_LINE); + DIFF_OPT_CLR(options, DIRSTAT_BY_FILE); + } else if (!memcmp(p, "lines", p_len)) { + DIFF_OPT_SET(options, DIRSTAT_BY_LINE); + DIFF_OPT_CLR(options, DIRSTAT_BY_FILE); + } else if (!memcmp(p, "files", p_len)) { + DIFF_OPT_CLR(options, DIRSTAT_BY_LINE); + DIFF_OPT_SET(options, DIRSTAT_BY_FILE); + } else if (!memcmp(p, "noncumulative", p_len)) { + DIFF_OPT_CLR(options, DIRSTAT_CUMULATIVE); + } else if (!memcmp(p, "cumulative", p_len)) { + DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE); + } else if (isdigit(*p)) { + char *end; + int permille = strtoul(p, &end, 10) * 10; + if (*end == '.' && isdigit(*++end)) { + /* only use first digit */ + permille += *end - '0'; + /* .. and ignore any further digits */ + while (isdigit(*++end)) + ; /* nothing */ + } + if (end - p == p_len) + options->dirstat_permille = permille; + else { + strbuf_addf(errmsg, _(" Failed to parse dirstat cut-off percentage '%.*s'\n"), + p_len, p); + ret++; + } + } else { + strbuf_addf(errmsg, _(" Unknown dirstat parameter '%.*s'\n"), + p_len, p); + ret++; + } + + p += p_len; + + if (*p) + p++; /* more parameters, swallow separator */ + } + return ret; + } + static int git_config_rename(const char *var, const char *value) { if (!value) @@@ -145,6 -198,17 +198,17 @@@ int git_diff_basic_config(const char *v return 0; } + if (!strcmp(var, "diff.dirstat")) { + struct strbuf errmsg = STRBUF_INIT; + default_diff_options.dirstat_permille = diff_dirstat_permille_default; + if (parse_dirstat_params(&default_diff_options, value, &errmsg)) + warning(_("Found errors in 'diff.dirstat' config variable:\n%s"), + errmsg.buf); + strbuf_release(&errmsg); + diff_dirstat_permille_default = default_diff_options.dirstat_permille; + return 0; + } + if (!prefixcmp(var, "submodule.")) return parse_submodule_config_option(var, value); @@@ -1455,7 -1519,7 +1519,7 @@@ struct dirstat_file struct dirstat_dir { struct dirstat_file *files; - int alloc, nr, percent, cumulative; + int alloc, nr, permille, cumulative; }; static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir, @@@ -1502,12 -1566,11 +1566,11 @@@ * under this directory (sources == 1). */ if (baselen && sources != 1) { - int permille = this_dir * 1000 / changed; - if (permille) { - int percent = permille / 10; - if (percent >= dir->percent) { + if (this_dir) { + int permille = this_dir * 1000 / changed; + if (permille >= dir->permille) { fprintf(opt->file, "%s%4d.%01d%% %.*s\n", line_prefix, - percent, permille % 10, baselen, base); + permille / 10, permille % 10, baselen, base); if (!dir->cumulative) return 0; } @@@ -1533,7 -1596,7 +1596,7 @@@ static void show_dirstat(struct diff_op dir.files = NULL; dir.alloc = 0; dir.nr = 0; - dir.percent = options->dirstat_percent; + dir.permille = options->dirstat_permille; dir.cumulative = DIFF_OPT_TST(options, DIRSTAT_CUMULATIVE); changed = 0; @@@ -1622,6 -1685,50 +1685,50 @@@ found_damage gather_dirstat(options, &dir, changed, "", 0); } + static void show_dirstat_by_line(struct diffstat_t *data, struct diff_options *options) + { + int i; + unsigned long changed; + struct dirstat_dir dir; + + if (data->nr == 0) + return; + + dir.files = NULL; + dir.alloc = 0; + dir.nr = 0; + dir.permille = options->dirstat_permille; + dir.cumulative = DIFF_OPT_TST(options, DIRSTAT_CUMULATIVE); + + changed = 0; + for (i = 0; i < data->nr; i++) { + struct diffstat_file *file = data->files[i]; + unsigned long damage = file->added + file->deleted; + if (file->is_binary) + /* + * binary files counts bytes, not lines. Must find some + * way to normalize binary bytes vs. textual lines. + * The following heuristic assumes that there are 64 + * bytes per "line". + * This is stupid and ugly, but very cheap... + */ + damage = (damage + 63) / 64; + ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc); + dir.files[dir.nr].name = file->name; + dir.files[dir.nr].changed = damage; + changed += damage; + dir.nr++; + } + + /* This can happen even with many files, if everything was renames */ + if (!changed) + return; + + /* Show all directories with more than x% of the changes */ + qsort(dir.files, dir.nr, sizeof(dir.files[0]), dirstat_compare); + gather_dirstat(options, &dir, changed, "", 0); + } + static void free_diffstat_info(struct diffstat_t *diffstat) { int i; @@@ -2891,7 -2998,7 +2998,7 @@@ void diff_setup(struct diff_options *op options->line_termination = '\n'; options->break_opt = -1; options->rename_limit = -1; - options->dirstat_percent = 3; + options->dirstat_permille = diff_dirstat_permille_default; options->context = 3; options->change = diff_change; @@@ -3149,6 -3256,21 +3256,21 @@@ static int stat_opt(struct diff_option return argcount; } + static int parse_dirstat_opt(struct diff_options *options, const char *params) + { + struct strbuf errmsg = STRBUF_INIT; + if (parse_dirstat_params(options, params, &errmsg)) + die(_("Failed to parse --dirstat/-X option parameter:\n%s"), + errmsg.buf); + strbuf_release(&errmsg); + /* + * The caller knows a dirstat-related option is given from the command + * line; allow it to say "return this_function();" + */ + options->output_format |= DIFF_FORMAT_DIRSTAT; + return 1; + } + int diff_opt_parse(struct diff_options *options, const char **av, int ac) { const char *arg = av[0]; @@@ -3168,15 -3290,19 +3290,19 @@@ options->output_format |= DIFF_FORMAT_NUMSTAT; else if (!strcmp(arg, "--shortstat")) options->output_format |= DIFF_FORMAT_SHORTSTAT; - else if (opt_arg(arg, 'X', "dirstat", &options->dirstat_percent)) - options->output_format |= DIFF_FORMAT_DIRSTAT; - else if (!strcmp(arg, "--cumulative")) { - options->output_format |= DIFF_FORMAT_DIRSTAT; - DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE); - } else if (opt_arg(arg, 0, "dirstat-by-file", - &options->dirstat_percent)) { - options->output_format |= DIFF_FORMAT_DIRSTAT; - DIFF_OPT_SET(options, DIRSTAT_BY_FILE); + else if (!strcmp(arg, "-X") || !strcmp(arg, "--dirstat")) + return parse_dirstat_opt(options, ""); + else if (!prefixcmp(arg, "-X")) + return parse_dirstat_opt(options, arg + 2); + else if (!prefixcmp(arg, "--dirstat=")) + return parse_dirstat_opt(options, arg + 10); + else if (!strcmp(arg, "--cumulative")) + return parse_dirstat_opt(options, "cumulative"); + else if (!strcmp(arg, "--dirstat-by-file")) + return parse_dirstat_opt(options, "files"); + else if (!prefixcmp(arg, "--dirstat-by-file=")) { + parse_dirstat_opt(options, "files"); + return parse_dirstat_opt(options, arg + 18); } else if (!strcmp(arg, "--check")) options->output_format |= DIFF_FORMAT_CHECKDIFF; @@@ -4023,6 -4149,7 +4149,7 @@@ void diff_flush(struct diff_options *op struct diff_queue_struct *q = &diff_queued_diff; int i, output_format = options->output_format; int separator = 0; + int dirstat_by_line = 0; /* * Order: raw, stat, summary, patch @@@ -4043,7 -4170,11 +4170,11 @@@ separator++; } - if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT)) { + if (output_format & DIFF_FORMAT_DIRSTAT && DIFF_OPT_TST(options, DIRSTAT_BY_LINE)) + dirstat_by_line = 1; + + if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT) || + dirstat_by_line) { struct diffstat_t diffstat; memset(&diffstat, 0, sizeof(struct diffstat_t)); @@@ -4058,10 -4189,12 +4189,12 @@@ show_stats(&diffstat, options); if (output_format & DIFF_FORMAT_SHORTSTAT) show_shortstats(&diffstat, options); + if (output_format & DIFF_FORMAT_DIRSTAT) + show_dirstat_by_line(&diffstat, options); free_diffstat_info(&diffstat); separator++; } - if (output_format & DIFF_FORMAT_DIRSTAT) + if ((output_format & DIFF_FORMAT_DIRSTAT) && !dirstat_by_line) show_dirstat(options); if (output_format & DIFF_FORMAT_SUMMARY && !is_summary_empty(q)) { @@@ -4416,20 -4549,20 +4549,20 @@@ void diff_change(struct diff_options *o DIFF_OPT_SET(options, HAS_CHANGES); } -void diff_unmerge(struct diff_options *options, - const char *path, - unsigned mode, const unsigned char *sha1) +struct diff_filepair *diff_unmerge(struct diff_options *options, const char *path) { + struct diff_filepair *pair; struct diff_filespec *one, *two; if (options->prefix && strncmp(path, options->prefix, options->prefix_length)) - return; + return NULL; one = alloc_filespec(path); two = alloc_filespec(path); - fill_filespec(one, sha1, mode); - diff_queue(&diff_queued_diff, one, two)->is_unmerged = 1; + pair = diff_queue(&diff_queued_diff, one, two); + pair->is_unmerged = 1; + return pair; } static char *run_textconv(const char *pgm, struct diff_filespec *spec, diff --combined diff.h index d83e53e9d4,937a903dce..adb40ba273 --- a/diff.h +++ b/diff.h @@@ -78,6 -78,7 +78,7 @@@ typedef struct strbuf *(*diff_prefix_fn #define DIFF_OPT_IGNORE_UNTRACKED_IN_SUBMODULES (1 << 25) #define DIFF_OPT_IGNORE_DIRTY_SUBMODULES (1 << 26) #define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27) + #define DIFF_OPT_DIRSTAT_BY_LINE (1 << 28) #define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag) #define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag) @@@ -114,7 -115,7 +115,7 @@@ struct diff_options int needed_rename_limit; int degraded_cc_to_c; int show_rename_progress; - int dirstat_percent; + int dirstat_permille; int setup; int abbrev; const char *prefix; @@@ -210,7 -211,10 +211,7 @@@ extern void diff_change(struct diff_opt const char *fullpath, unsigned dirty_submodule1, unsigned dirty_submodule2); -extern void diff_unmerge(struct diff_options *, - const char *path, - unsigned mode, - const unsigned char *sha1); +extern struct diff_filepair *diff_unmerge(struct diff_options *, const char *path); #define DIFF_SETUP_REVERSE 1 #define DIFF_SETUP_USE_CACHE 2