Merge branch 'rs/diff-cleanup-records-fix'
authorJunio C Hamano <gitster@pobox.com>
Fri, 14 Oct 2011 02:03:22 +0000 (19:03 -0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 14 Oct 2011 02:03:22 +0000 (19:03 -0700)
* rs/diff-cleanup-records-fix:
diff: resurrect XDF_NEED_MINIMAL with --minimal
Revert removal of multi-match discard heuristic in 27af01

1  2 
diff.c
xdiff/xprepare.c
diff --combined diff.c
index eed227a6ace278c47a68082015d942dc2fe9cba5,c261cc049d1870845a500bafb6bf53067cf00fc3..d922b77aef2da84824a8e14fc21961e36e6d2e36
--- 1/diff.c
--- 2/diff.c
+++ b/diff.c
@@@ -137,7 -137,7 +137,7 @@@ static int git_config_rename(const cha
  int git_diff_ui_config(const char *var, const char *value, void *cb)
  {
        if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) {
 -              diff_use_color_default = git_config_colorbool(var, value, -1);
 +              diff_use_color_default = git_config_colorbool(var, value);
                return 0;
        }
        if (!strcmp(var, "diff.renames")) {
        if (!strcmp(var, "diff.ignoresubmodules"))
                handle_ignore_submodules_arg(&default_diff_options, value);
  
 +      if (git_color_config(var, value, cb) < 0)
 +              return -1;
 +
        return git_diff_basic_config(var, value, cb);
  }
  
@@@ -215,7 -212,7 +215,7 @@@ int git_diff_basic_config(const char *v
        if (!prefixcmp(var, "submodule."))
                return parse_submodule_config_option(var, value);
  
 -      return git_color_default_config(var, value, cb);
 +      return git_default_config(var, value, cb);
  }
  
  static char *quote_two(const char *one, const char *two)
@@@ -586,10 -583,11 +586,10 @@@ static void emit_rewrite_diff(const cha
                              struct diff_options *o)
  {
        int lc_a, lc_b;
 -      int color_diff = DIFF_OPT_TST(o, COLOR_DIFF);
        const char *name_a_tab, *name_b_tab;
 -      const char *metainfo = diff_get_color(color_diff, DIFF_METAINFO);
 -      const char *fraginfo = diff_get_color(color_diff, DIFF_FRAGINFO);
 -      const char *reset = diff_get_color(color_diff, DIFF_RESET);
 +      const char *metainfo = diff_get_color(o->use_color, DIFF_METAINFO);
 +      const char *fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO);
 +      const char *reset = diff_get_color(o->use_color, DIFF_RESET);
        static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
        const char *a_prefix, *b_prefix;
        char *data_one, *data_two;
        size_two = fill_textconv(textconv_two, two, &data_two);
  
        memset(&ecbdata, 0, sizeof(ecbdata));
 -      ecbdata.color_diff = color_diff;
 +      ecbdata.color_diff = want_color(o->use_color);
        ecbdata.found_changesp = &o->found_changes;
        ecbdata.ws_rule = whitespace_rule(name_b ? name_b : name_a);
        ecbdata.opt = o;
@@@ -1006,7 -1004,7 +1006,7 @@@ static void free_diff_words_data(struc
  
  const char *diff_get_color(int diff_use_color, enum color_diff ix)
  {
 -      if (diff_use_color)
 +      if (want_color(diff_use_color))
                return diff_colors[ix];
        return "";
  }
@@@ -1810,10 -1808,11 +1810,10 @@@ static int is_conflict_marker(const cha
  static void checkdiff_consume(void *priv, char *line, unsigned long len)
  {
        struct checkdiff_t *data = priv;
 -      int color_diff = DIFF_OPT_TST(data->o, COLOR_DIFF);
        int marker_size = data->conflict_marker_size;
 -      const char *ws = diff_get_color(color_diff, DIFF_WHITESPACE);
 -      const char *reset = diff_get_color(color_diff, DIFF_RESET);
 -      const char *set = diff_get_color(color_diff, DIFF_FILE_NEW);
 +      const char *ws = diff_get_color(data->o->use_color, DIFF_WHITESPACE);
 +      const char *reset = diff_get_color(data->o->use_color, DIFF_RESET);
 +      const char *set = diff_get_color(data->o->use_color, DIFF_FILE_NEW);
        char *err;
        char *line_prefix = "";
        struct strbuf *msgbuf;
@@@ -2158,7 -2157,7 +2158,7 @@@ static void builtin_diff(const char *na
                memset(&xecfg, 0, sizeof(xecfg));
                memset(&ecbdata, 0, sizeof(ecbdata));
                ecbdata.label_path = lbl;
 -              ecbdata.color_diff = DIFF_OPT_TST(o, COLOR_DIFF);
 +              ecbdata.color_diff = want_color(o->use_color);
                ecbdata.found_changesp = &o->found_changes;
                ecbdata.ws_rule = whitespace_rule(name_b ? name_b : name_a);
                if (ecbdata.ws_rule & WS_BLANK_AT_EOF)
                                        break;
                                }
                        }
 -                      if (DIFF_OPT_TST(o, COLOR_DIFF)) {
 +                      if (want_color(o->use_color)) {
                                struct diff_words_style *st = ecbdata.diff_words->style;
                                st->old.color = diff_get_color_opt(o, DIFF_FILE_OLD);
                                st->new.color = diff_get_color_opt(o, DIFF_FILE_NEW);
@@@ -2274,8 -2273,6 +2274,8 @@@ static void builtin_diffstat(const cha
                memset(&xpp, 0, sizeof(xpp));
                memset(&xecfg, 0, sizeof(xecfg));
                xpp.flags = o->xdl_opts;
 +              xecfg.ctxlen = o->context;
 +              xecfg.interhunkctxlen = o->interhunkcontext;
                xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat,
                              &xpp, &xecfg);
        }
@@@ -2858,7 -2855,7 +2858,7 @@@ static void run_diff_cmd(const char *pg
                 */
                fill_metainfo(msg, name, other, one, two, o, p,
                              &must_show_header,
 -                            DIFF_OPT_TST(o, COLOR_DIFF) && !pgm);
 +                            want_color(o->use_color) && !pgm);
                xfrm_msg = msg->len ? msg->buf : NULL;
        }
  
@@@ -3024,7 -3021,8 +3024,7 @@@ void diff_setup(struct diff_options *op
  
        options->change = diff_change;
        options->add_remove = diff_addremove;
 -      if (diff_use_color_default > 0)
 -              DIFF_OPT_SET(options, COLOR_DIFF);
 +      options->use_color = diff_use_color_default;
        options->detect_rename = diff_detect_rename_default;
  
        if (diff_no_prefix) {
@@@ -3387,6 -3385,10 +3387,10 @@@ int diff_opt_parse(struct diff_options 
        }
  
        /* xdiff options */
+       else if (!strcmp(arg, "--minimal"))
+               DIFF_XDL_SET(options, NEED_MINIMAL);
+       else if (!strcmp(arg, "--no-minimal"))
+               DIFF_XDL_CLR(options, NEED_MINIMAL);
        else if (!strcmp(arg, "-w") || !strcmp(arg, "--ignore-all-space"))
                DIFF_XDL_SET(options, IGNORE_WHITESPACE);
        else if (!strcmp(arg, "-b") || !strcmp(arg, "--ignore-space-change"))
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
        else if (!strcmp(arg, "--patience"))
                DIFF_XDL_SET(options, PATIENCE_DIFF);
 +      else if (!strcmp(arg, "--histogram"))
 +              DIFF_XDL_SET(options, HISTOGRAM_DIFF);
  
        /* flags options */
        else if (!strcmp(arg, "--binary")) {
        else if (!strcmp(arg, "--follow"))
                DIFF_OPT_SET(options, FOLLOW_RENAMES);
        else if (!strcmp(arg, "--color"))
 -              DIFF_OPT_SET(options, COLOR_DIFF);
 +              options->use_color = 1;
        else if (!prefixcmp(arg, "--color=")) {
 -              int value = git_config_colorbool(NULL, arg+8, -1);
 -              if (value == 0)
 -                      DIFF_OPT_CLR(options, COLOR_DIFF);
 -              else if (value > 0)
 -                      DIFF_OPT_SET(options, COLOR_DIFF);
 -              else
 +              int value = git_config_colorbool(NULL, arg+8);
 +              if (value < 0)
                        return error("option `color' expects \"always\", \"auto\", or \"never\"");
 +              options->use_color = value;
        }
        else if (!strcmp(arg, "--no-color"))
 -              DIFF_OPT_CLR(options, COLOR_DIFF);
 +              options->use_color = 0;
        else if (!strcmp(arg, "--color-words")) {
 -              DIFF_OPT_SET(options, COLOR_DIFF);
 +              options->use_color = 1;
                options->word_diff = DIFF_WORDS_COLOR;
        }
        else if (!prefixcmp(arg, "--color-words=")) {
 -              DIFF_OPT_SET(options, COLOR_DIFF);
 +              options->use_color = 1;
                options->word_diff = DIFF_WORDS_COLOR;
                options->word_regex = arg + 14;
        }
                if (!strcmp(type, "plain"))
                        options->word_diff = DIFF_WORDS_PLAIN;
                else if (!strcmp(type, "color")) {
 -                      DIFF_OPT_SET(options, COLOR_DIFF);
 +                      options->use_color = 1;
                        options->word_diff = DIFF_WORDS_COLOR;
                }
                else if (!strcmp(type, "porcelain"))
diff --combined xdiff/xprepare.c
index 5a33d1a86964472a82a63f18ab3c9b4da9b23165,4c447ca6d2aa26f4a88690a2cf97376a83d1ffad..e419f4f726019a5b0365c589285439fb3bfb8db2
@@@ -26,8 -26,6 +26,8 @@@
  #define XDL_KPDIS_RUN 4
  #define XDL_MAX_EQLIMIT 1024
  #define XDL_SIMSCAN_WINDOW 100
 +#define XDL_GUESS_NLINES1 256
 +#define XDL_GUESS_NLINES2 20
  
  
  typedef struct s_xdlclass {
@@@ -69,6 -67,8 +69,6 @@@ static int xdl_optimize_ctxs(xdlclassif
  
  
  static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
 -      long i;
 -
        cf->flags = flags;
  
        cf->hbits = xdl_hashbits((unsigned int) size);
@@@ -83,7 -83,8 +83,7 @@@
                xdl_cha_free(&cf->ncha);
                return -1;
        }
 -      for (i = 0; i < cf->hsize; i++)
 -              cf->rchash[i] = NULL;
 +      memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *));
  
        cf->alloc = size;
        if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) {
@@@ -160,7 -161,7 +160,7 @@@ static int xdl_classify_record(unsigne
  static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
                           xdlclassifier_t *cf, xdfile_t *xdf) {
        unsigned int hbits;
 -      long i, nrec, hsize, bsize;
 +      long nrec, hsize, bsize;
        unsigned long hav;
        char const *blk, *cur, *top, *prev;
        xrecord_t *crec;
        char *rchg;
        long *rindex;
  
 -      if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) {
 -
 -              return -1;
 +      ha = NULL;
 +      rindex = NULL;
 +      rchg = NULL;
 +      rhash = NULL;
 +      recs = NULL;
 +
 +      if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
 +              goto abort;
 +      if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
 +              goto abort;
 +
 +      if (xpp->flags & XDF_HISTOGRAM_DIFF)
 +              hbits = hsize = 0;
 +      else {
 +              hbits = xdl_hashbits((unsigned int) narec);
 +              hsize = 1 << hbits;
 +              if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
 +                      goto abort;
 +              memset(rhash, 0, hsize * sizeof(xrecord_t *));
        }
 -      if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) {
 -
 -              xdl_cha_free(&xdf->rcha);
 -              return -1;
 -      }
 -
 -      hbits = xdl_hashbits((unsigned int) narec);
 -      hsize = 1 << hbits;
 -      if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) {
 -
 -              xdl_free(recs);
 -              xdl_cha_free(&xdf->rcha);
 -              return -1;
 -      }
 -      for (i = 0; i < hsize; i++)
 -              rhash[i] = NULL;
  
        nrec = 0;
        if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
 -              for (top = blk + bsize;;) {
 -                      if (cur >= top) {
 -                              if (!(cur = blk = xdl_mmfile_next(mf, &bsize)))
 -                                      break;
 -                              top = blk + bsize;
 -                      }
 +              for (top = blk + bsize; cur < top; ) {
                        prev = cur;
                        hav = xdl_hash_record(&cur, top, xpp->flags);
                        if (nrec >= narec) {
                                narec *= 2;
 -                              if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
 -
 -                                      xdl_free(rhash);
 -                                      xdl_free(recs);
 -                                      xdl_cha_free(&xdf->rcha);
 -                                      return -1;
 -                              }
 +                              if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *))))
 +                                      goto abort;
                                recs = rrecs;
                        }
 -                      if (!(crec = xdl_cha_alloc(&xdf->rcha))) {
 -
 -                              xdl_free(rhash);
 -                              xdl_free(recs);
 -                              xdl_cha_free(&xdf->rcha);
 -                              return -1;
 -                      }
 +                      if (!(crec = xdl_cha_alloc(&xdf->rcha)))
 +                              goto abort;
                        crec->ptr = prev;
                        crec->size = (long) (cur - prev);
                        crec->ha = hav;
                        recs[nrec++] = crec;
  
 -                      if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) {
 -
 -                              xdl_free(rhash);
 -                              xdl_free(recs);
 -                              xdl_cha_free(&xdf->rcha);
 -                              return -1;
 -                      }
 +                      if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
 +                              xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
 +                              goto abort;
                }
        }
  
 -      if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) {
 -
 -              xdl_free(rhash);
 -              xdl_free(recs);
 -              xdl_cha_free(&xdf->rcha);
 -              return -1;
 -      }
 +      if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char))))
 +              goto abort;
        memset(rchg, 0, (nrec + 2) * sizeof(char));
  
 -      if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) {
 -
 -              xdl_free(rchg);
 -              xdl_free(rhash);
 -              xdl_free(recs);
 -              xdl_cha_free(&xdf->rcha);
 -              return -1;
 -      }
 -      if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) {
 -
 -              xdl_free(rindex);
 -              xdl_free(rchg);
 -              xdl_free(rhash);
 -              xdl_free(recs);
 -              xdl_cha_free(&xdf->rcha);
 -              return -1;
 -      }
 +      if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long))))
 +              goto abort;
 +      if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long))))
 +              goto abort;
  
        xdf->nrec = nrec;
        xdf->recs = recs;
        xdf->dend = nrec - 1;
  
        return 0;
 +
 +abort:
 +      xdl_free(ha);
 +      xdl_free(rindex);
 +      xdl_free(rchg);
 +      xdl_free(rhash);
 +      xdl_free(recs);
 +      xdl_cha_free(&xdf->rcha);
 +      return -1;
  }
  
  
@@@ -261,25 -290,13 +261,25 @@@ static void xdl_free_ctx(xdfile_t *xdf
  
  int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
                    xdfenv_t *xe) {
 -      long enl1, enl2;
 +      long enl1, enl2, sample;
        xdlclassifier_t cf;
  
 -      enl1 = xdl_guess_lines(mf1) + 1;
 -      enl2 = xdl_guess_lines(mf2) + 1;
 +      memset(&cf, 0, sizeof(cf));
  
 -      if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
 +      /*
 +       * For histogram diff, we can afford a smaller sample size and
 +       * thus a poorer estimate of the number of lines, as the hash
 +       * table (rhash) won't be filled up/grown. The number of lines
 +       * (nrecs) will be updated correctly anyway by
 +       * xdl_prepare_ctx().
 +       */
 +      sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1;
 +
 +      enl1 = xdl_guess_lines(mf1, sample) + 1;
 +      enl2 = xdl_guess_lines(mf2, sample) + 1;
 +
 +      if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
 +              xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
  
                return -1;
        }
        }
  
        if (!(xpp->flags & XDF_PATIENCE_DIFF) &&
 +                      !(xpp->flags & XDF_HISTOGRAM_DIFF) &&
                        xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
  
                xdl_free_ctx(&xe->xdf2);
                return -1;
        }
  
 -      xdl_free_classifier(&cf);
 +      if (!(xpp->flags & XDF_HISTOGRAM_DIFF))
 +              xdl_free_classifier(&cf);
  
        return 0;
  }
@@@ -383,7 -398,7 +383,7 @@@ static int xdl_clean_mmatch(char const 
   * might be potentially discarded if they happear in a run of discardable.
   */
  static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
-       long i, nm, nreff;
+       long i, nm, nreff, mlim;
        xrecord_t **recs;
        xdlclass_t *rcrec;
        char *dis, *dis1, *dis2;
        dis1 = dis;
        dis2 = dis1 + xdf1->nrec + 1;
  
+       if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
+               mlim = XDL_MAX_EQLIMIT;
        for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
                rcrec = cf->rcrecs[(*recs)->ha];
                nm = rcrec ? rcrec->len2 : 0;
-               dis1[i] = (nm == 0) ? 0: 1;
+               dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
        }
  
+       if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
+               mlim = XDL_MAX_EQLIMIT;
        for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
                rcrec = cf->rcrecs[(*recs)->ha];
                nm = rcrec ? rcrec->len1 : 0;
-               dis2[i] = (nm == 0) ? 0: 1;
+               dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
        }
  
        for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];