# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
-# Define NO_CURL if you do not have libcurl installed. git-http-pull and
+# Define NO_CURL if you do not have libcurl installed. git-http-fetch and
# git-http-push are not built, and you cannot use http:// and https://
-# transports.
+# transports (neither smart nor dumb).
#
# Define CURLDIR=/foo/bar if your curl header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NO_EXPAT if you do not have expat installed. git-http-push is
-# not built, and you cannot push using http:// and https:// transports.
+# not built, and you cannot push using http:// and https:// transports (dumb).
#
# Define EXPATDIR=/foo/bar if your expat header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NEEDS_SSL_WITH_CRYPTO if you need -lssl when using -lcrypto (Darwin).
#
+# Define NEEDS_SSL_WITH_CURL if you need -lssl with -lcurl (Minix).
+#
+# Define NEEDS_IDN_WITH_CURL if you need -lidn when using -lcurl (Minix).
+#
# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
#
# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
# that tells runtime paths to dynamic libraries;
# "-Wl,-rpath=/path/lib" is used instead.
#
+# Define NO_NORETURN if using buggy versions of gcc 4.6+ and profile feedback,
+# as the compiler can crash (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49299)
+#
# Define USE_NSEC below if you want git to care about sub-second file mtimes
# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
LIB_H += sideband.h
LIB_H += sigchain.h
LIB_H += strbuf.h
+LIB_H += streaming.h
LIB_H += string-list.h
LIB_H += submodule.h
LIB_H += tag.h
LIB_OBJS += sideband.o
LIB_OBJS += sigchain.o
LIB_OBJS += strbuf.o
+LIB_OBJS += streaming.o
LIB_OBJS += string-list.o
LIB_OBJS += submodule.o
LIB_OBJS += symlinks.o
X = .exe
endif
ifeq ($(uname_S),Interix)
- NO_SYS_POLL_H = YesPlease
- NO_INTTYPES_H = YesPlease
NO_INITGROUPS = YesPlease
NO_IPV6 = YesPlease
NO_MEMMEM = YesPlease
ifeq ($(uname_R),3.5)
NO_INET_NTOP = YesPlease
NO_INET_PTON = YesPlease
+ NO_SOCKADDR_STORAGE = YesPlease
+ NO_FNMATCH_CASEFOLD = YesPlease
endif
ifeq ($(uname_R),5.2)
NO_INET_NTOP = YesPlease
NO_INET_PTON = YesPlease
+ NO_SOCKADDR_STORAGE = YesPlease
+ NO_FNMATCH_CASEFOLD = YesPlease
endif
endif
+ifeq ($(uname_S),Minix)
+ NO_IPV6 = YesPlease
+ NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
+ NO_NSEC = YesPlease
+ NEEDS_LIBGEN =
+ NEEDS_CRYPTO_WITH_SSL = YesPlease
+ NEEDS_IDN_WITH_CURL = YesPlease
+ NEEDS_SSL_WITH_CURL = YesPlease
+ NEEDS_RESOLV =
+ NO_HSTRERROR = YesPlease
+ NO_MMAP = YesPlease
+ NO_CURL =
+ NO_EXPAT =
+endif
ifneq (,$(findstring MINGW,$(uname_S)))
pathsep = ;
NO_PREAD = YesPlease
else
CURL_LIBCURL = -lcurl
endif
+ ifdef NEEDS_SSL_WITH_CURL
+ CURL_LIBCURL += -lssl
+ ifdef NEEDS_CRYPTO_WITH_SSL
+ CURL_LIBCURL += -lcrypto
+ endif
+ endif
+ ifdef NEEDS_IDN_WITH_CURL
+ CURL_LIBCURL += -lidn
+ endif
+
REMOTE_CURL_PRIMARY = git-remote-http$X
REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X
REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES)
OPENSSL_LINK =
endif
ifdef NEEDS_CRYPTO_WITH_SSL
- OPENSSL_LINK += -lcrypto
+ OPENSSL_LIBSSL += -lcrypto
endif
else
BASIC_CFLAGS += -DNO_OPENSSL
ifdef USE_ST_TIMESPEC
BASIC_CFLAGS += -DUSE_ST_TIMESPEC
endif
+ifdef NO_NORETURN
+ BASIC_CFLAGS += -DNO_NORETURN
+endif
ifdef NO_NSEC
BASIC_CFLAGS += -DNO_NSEC
endif
'-DGIT_MAN_PATH="$(mandir_SQ)"' \
'-DGIT_INFO_PATH="$(infodir_SQ)"'
-git$X: git.o $(BUILTIN_OBJS) $(GITLIBS)
+git$X: git.o GIT-LDFLAGS $(BUILTIN_OBJS) $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ git.o \
$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
GIT_OBJS += http.o http-walker.o remote-curl.o
endif
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
- xdiff/xmerge.o xdiff/xpatience.o
+ xdiff/xmerge.o xdiff/xpatience.o xdiff/xhistogram.o
VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \
-DNDEBUG -DOVERRIDE_STRDUP -DREPLACE_SYSTEM_ALLOCATOR
endif
-git-%$X: %.o $(GITLIBS)
+git-%$X: %.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
-git-imap-send$X: imap-send.o $(GITLIBS)
+git-imap-send$X: imap-send.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(LIBS) $(OPENSSL_LINK) $(OPENSSL_LIBSSL) $(LIB_4_CRYPTO)
-git-http-fetch$X: revision.o http.o http-walker.o http-fetch.o $(GITLIBS)
+git-http-fetch$X: revision.o http.o http-walker.o http-fetch.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(LIBS) $(CURL_LIBCURL)
-git-http-push$X: revision.o http.o http-push.o $(GITLIBS)
+git-http-push$X: revision.o http.o http-push.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
ln -s $< $@ 2>/dev/null || \
cp $< $@
-$(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o $(GITLIBS)
+$(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
echo "$$FLAGS" >GIT-CFLAGS; \
fi
+TRACK_LDFLAGS = $(subst ','\'',$(ALL_LDFLAGS))
+
+GIT-LDFLAGS: FORCE
+ @FLAGS='$(TRACK_LDFLAGS)'; \
+ if test x"$$FLAGS" != x"`cat GIT-LDFLAGS 2>/dev/null`" ; then \
+ echo 1>&2 " * new link flags"; \
+ echo "$$FLAGS" >GIT-LDFLAGS; \
+ fi
+
# We need to apply sq twice, once to protect from the shell
# that runs GIT-BUILD-OPTIONS, and then again to protect it
# and the first level quoting from the shell that runs "echo".
.PRECIOUS: $(TEST_OBJS)
-test-%$X: test-%.o $(GITLIBS)
+test-%$X: test-%.o GIT-LDFLAGS $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
check-sha1:: test-sha1$X
$(MAKE) -C gitk-git clean
$(MAKE) -C git-gui clean
endif
- $(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS
+ $(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-LDFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS
.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
cover_db_html: cover_db
cover -report html -outputdir cover_db_html cover_db
+
+### profile feedback build
+#
+.PHONY: profile-all profile-clean
+
+PROFILE_GEN_CFLAGS := $(CFLAGS) -fprofile-generate -DNO_NORETURN=1
+PROFILE_USE_CFLAGS := $(CFLAGS) -fprofile-use -fprofile-correction -DNO_NORETURN=1
+
+profile-clean:
+ $(RM) $(addsuffix *.gcda,$(object_dirs))
+ $(RM) $(addsuffix *.gcno,$(object_dirs))
+
+profile-all: profile-clean
+ $(MAKE) CFLAGS="$(PROFILE_GEN_CFLAGS)" all
+ $(MAKE) CFLAGS="$(PROFILE_GEN_CFLAGS)" -j1 test
+ $(MAKE) CFLAGS="$(PROFILE_USE_CFLAGS)" all
int i, len, add, del, adds = 0, dels = 0;
uintmax_t max_change = 0, max_len = 0;
int total_files = data->nr;
- int width, name_width;
+ int width, name_width, count;
const char *reset, *add_c, *del_c;
const char *line_prefix = "";
+ int extra_shown = 0;
struct strbuf *msg = NULL;
if (data->nr == 0)
width = options->stat_width ? options->stat_width : 80;
name_width = options->stat_name_width ? options->stat_name_width : 50;
+ count = options->stat_count ? options->stat_count : data->nr;
/* Sanity: give at least 5 columns to the graph,
* but leave at least 10 columns for the name.
add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
- for (i = 0; i < data->nr; i++) {
+ for (i = 0; (i < count) && (i < data->nr); i++) {
struct diffstat_file *file = data->files[i];
uintmax_t change = file->added + file->deleted;
+ if (!data->files[i]->is_renamed &&
+ (change == 0)) {
+ count++; /* not shown == room for one more */
+ continue;
+ }
fill_print_name(file);
len = strlen(file->print_name);
if (max_len < len)
if (max_change < change)
max_change = change;
}
+ count = i; /* min(count, data->nr) */
/* Compute the width of the graph part;
* 10 is for one blank at the beginning of the line plus
else
width = max_change;
- for (i = 0; i < data->nr; i++) {
+ for (i = 0; i < count; i++) {
const char *prefix = "";
char *name = data->files[i]->print_name;
uintmax_t added = data->files[i]->added;
uintmax_t deleted = data->files[i]->deleted;
int name_len;
+ if (!data->files[i]->is_renamed &&
+ (added + deleted == 0)) {
+ total_files--;
+ continue;
+ }
/*
* "scale" the filename
*/
fprintf(options->file, " Unmerged\n");
continue;
}
- else if (!data->files[i]->is_renamed &&
- (added + deleted == 0)) {
- total_files--;
- continue;
- }
/*
* scale the add/delete
show_graph(options->file, '-', del, del_c, reset);
fprintf(options->file, "\n");
}
+ for (i = count; i < data->nr; i++) {
+ uintmax_t added = data->files[i]->added;
+ uintmax_t deleted = data->files[i]->deleted;
+ if (!data->files[i]->is_renamed &&
+ (added + deleted == 0)) {
+ total_files--;
+ continue;
+ }
+ adds += added;
+ dels += deleted;
+ if (!extra_shown)
+ fprintf(options->file, "%s ...\n", line_prefix);
+ extra_shown = 1;
+ }
fprintf(options->file, "%s", line_prefix);
fprintf(options->file,
" %d files changed, %d insertions(+), %d deletions(-)\n",
{
int bound;
unsigned char *deflated;
- z_stream stream;
+ git_zstream stream;
memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, zlib_compression_level);
- bound = deflateBound(&stream, size);
+ git_deflate_init(&stream, zlib_compression_level);
+ bound = git_deflate_bound(&stream, size);
deflated = xmalloc(bound);
stream.next_out = deflated;
stream.avail_out = bound;
stream.next_in = (unsigned char *)data;
stream.avail_in = size;
- while (deflate(&stream, Z_FINISH) == Z_OK)
+ while (git_deflate(&stream, Z_FINISH) == Z_OK)
; /* nothing */
- deflateEnd(&stream);
+ git_deflate_end(&stream);
*result_size = stream.total_out;
return deflated;
}
return NULL;
diff_filespec_load_driver(one);
- if (!one->driver->textconv)
- return NULL;
-
- if (one->driver->textconv_want_cache && !one->driver->textconv_cache) {
- struct notes_cache *c = xmalloc(sizeof(*c));
- struct strbuf name = STRBUF_INIT;
-
- strbuf_addf(&name, "textconv/%s", one->driver->name);
- notes_cache_init(c, name.buf, one->driver->textconv);
- one->driver->textconv_cache = c;
- }
-
- return one->driver;
+ return userdiff_get_textconv(one->driver);
}
static void builtin_diff(const char *name_a,
char *end;
int width = options->stat_width;
int name_width = options->stat_name_width;
+ int count = options->stat_count;
int argcount = 1;
arg += strlen("--stat");
name_width = strtoul(av[1], &end, 10);
argcount = 2;
}
+ } else if (!prefixcmp(arg, "-count")) {
+ arg += strlen("-count");
+ if (*arg == '=')
+ count = strtoul(arg + 1, &end, 10);
+ else if (!*arg && !av[1])
+ die("Option '--stat-count' requires a value");
+ else if (!*arg) {
+ count = strtoul(av[1], &end, 10);
+ argcount = 2;
+ }
}
break;
case '=':
width = strtoul(arg+1, &end, 10);
if (*end == ',')
name_width = strtoul(end+1, &end, 10);
+ if (*end == ',')
+ count = strtoul(end+1, &end, 10);
}
/* Important! This checks all the error cases! */
options->output_format |= DIFF_FORMAT_DIFFSTAT;
options->stat_name_width = name_width;
options->stat_width = width;
+ options->stat_count = count;
return argcount;
}
else if (!strcmp(arg, "-s"))
options->output_format |= DIFF_FORMAT_NO_OUTPUT;
else if (!prefixcmp(arg, "--stat"))
- /* --stat, --stat-width, or --stat-name-width */
+ /* --stat, --stat-width, --stat-name-width, or --stat-count */
return stat_opt(options, av);
/* renames options */
DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
else if (!strcmp(arg, "--patience"))
DIFF_XDL_SET(options, PATIENCE_DIFF);
+ else if (!strcmp(arg, "--histogram"))
+ DIFF_XDL_SET(options, HISTOGRAM_DIFF);
/* flags options */
else if (!strcmp(arg, "--binary")) {
#define XDL_KPDIS_RUN 4
#define XDL_MAX_EQLIMIT 1024
#define XDL_SIMSCAN_WINDOW 100
+ #define XDL_GUESS_NLINES1 256
+ #define XDL_GUESS_NLINES2 20
typedef struct s_xdlclass {
char const *line;
long size;
long idx;
+ long len1, len2;
} xdlclass_t;
typedef struct s_xdlclassifier {
long hsize;
xdlclass_t **rchash;
chastore_t ncha;
+ xdlclass_t **rcrecs;
+ long alloc;
long count;
long flags;
} xdlclassifier_t;
static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags);
static void xdl_free_classifier(xdlclassifier_t *cf);
-static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
- xrecord_t *rec);
-static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
+static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
+ unsigned int hbits, xrecord_t *rec);
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
xdlclassifier_t *cf, xdfile_t *xdf);
static void xdl_free_ctx(xdfile_t *xdf);
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
-static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
+static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
-static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2);
+static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
- long i;
-
cf->flags = flags;
cf->hbits = xdl_hashbits((unsigned int) size);
xdl_cha_free(&cf->ncha);
return -1;
}
- for (i = 0; i < cf->hsize; i++)
- cf->rchash[i] = NULL;
+ memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *));
+ cf->alloc = size;
+ if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) {
+
+ xdl_free(cf->rchash);
+ xdl_cha_free(&cf->ncha);
+ return -1;
+ }
+
cf->count = 0;
return 0;
static void xdl_free_classifier(xdlclassifier_t *cf) {
+ xdl_free(cf->rcrecs);
xdl_free(cf->rchash);
xdl_cha_free(&cf->ncha);
}
-static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
- xrecord_t *rec) {
+static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
+ unsigned int hbits, xrecord_t *rec) {
long hi;
char const *line;
xdlclass_t *rcrec;
+ xdlclass_t **rcrecs;
line = rec->ptr;
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
return -1;
}
rcrec->idx = cf->count++;
+ if (cf->count > cf->alloc) {
+ cf->alloc *= 2;
+ if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) {
+
+ return -1;
+ }
+ cf->rcrecs = rcrecs;
+ }
+ cf->rcrecs[rcrec->idx] = rcrec;
rcrec->line = line;
rcrec->size = rec->size;
rcrec->ha = rec->ha;
+ rcrec->len1 = rcrec->len2 = 0;
rcrec->next = cf->rchash[hi];
cf->rchash[hi] = rcrec;
}
+ (pass == 1) ? rcrec->len1++ : rcrec->len2++;
+
rec->ha = (unsigned long) rcrec->idx;
hi = (long) XDL_HASHLONG(rec->ha, hbits);
}
-static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
- long i, nrec, hsize, bsize;
+ long nrec, hsize, bsize;
unsigned long hav;
char const *blk, *cur, *top, *prev;
xrecord_t *crec;
char *rchg;
long *rindex;
- if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) {
-
- return -1;
- }
- if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) {
-
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
-
- hbits = xdl_hashbits((unsigned int) narec);
- hsize = 1 << hbits;
- if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) {
-
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
+ ha = NULL;
+ rindex = NULL;
+ rchg = NULL;
+ rhash = NULL;
+ recs = NULL;
+
+ if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
+ goto abort;
+ if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
+ goto abort;
+
+ if (xpp->flags & XDF_HISTOGRAM_DIFF)
+ hbits = hsize = 0;
+ else {
+ hbits = xdl_hashbits((unsigned int) narec);
+ hsize = 1 << hbits;
+ if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+ goto abort;
+ memset(rhash, 0, hsize * sizeof(xrecord_t *));
}
- for (i = 0; i < hsize; i++)
- rhash[i] = NULL;
nrec = 0;
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
- for (top = blk + bsize;;) {
- if (cur >= top) {
- if (!(cur = blk = xdl_mmfile_next(mf, &bsize)))
- break;
- top = blk + bsize;
- }
+ for (top = blk + bsize; cur < top; ) {
prev = cur;
hav = xdl_hash_record(&cur, top, xpp->flags);
if (nrec >= narec) {
narec *= 2;
- if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
-
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
+ if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *))))
+ goto abort;
recs = rrecs;
}
- if (!(crec = xdl_cha_alloc(&xdf->rcha))) {
-
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
+ if (!(crec = xdl_cha_alloc(&xdf->rcha)))
+ goto abort;
crec->ptr = prev;
crec->size = (long) (cur - prev);
crec->ha = hav;
recs[nrec++] = crec;
- if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) {
-
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
+ if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
- xdl_classify_record(cf, rhash, hbits, crec) < 0)
++ xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
+ goto abort;
}
}
- if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) {
-
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
+ if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char))))
+ goto abort;
memset(rchg, 0, (nrec + 2) * sizeof(char));
- if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) {
-
- xdl_free(rchg);
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
- if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) {
-
- xdl_free(rindex);
- xdl_free(rchg);
- xdl_free(rhash);
- xdl_free(recs);
- xdl_cha_free(&xdf->rcha);
- return -1;
- }
+ if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long))))
+ goto abort;
+ if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long))))
+ goto abort;
xdf->nrec = nrec;
xdf->recs = recs;
xdf->dend = nrec - 1;
return 0;
+
+ abort:
+ xdl_free(ha);
+ xdl_free(rindex);
+ xdl_free(rchg);
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
}
int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
- long enl1, enl2;
+ long enl1, enl2, sample;
xdlclassifier_t cf;
- enl1 = xdl_guess_lines(mf1) + 1;
- enl2 = xdl_guess_lines(mf2) + 1;
+ /*
+ * For histogram diff, we can afford a smaller sample size and
+ * thus a poorer estimate of the number of lines, as the hash
+ * table (rhash) won't be filled up/grown. The number of lines
+ * (nrecs) will be updated correctly anyway by
+ * xdl_prepare_ctx().
+ */
+ sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1;
+
+ enl1 = xdl_guess_lines(mf1, sample) + 1;
+ enl2 = xdl_guess_lines(mf2, sample) + 1;
- if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
+ if (!(xpp->flags & XDF_HISTOGRAM_DIFF) &&
+ xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
return -1;
}
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+ if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
xdl_free_classifier(&cf);
return -1;
}
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+ if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
return -1;
}
- if (!(xpp->flags & XDF_HISTOGRAM_DIFF))
- xdl_free_classifier(&cf);
-
if (!(xpp->flags & XDF_PATIENCE_DIFF) &&
- xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) {
+ !(xpp->flags & XDF_HISTOGRAM_DIFF) &&
+ xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf2);
xdl_free_ctx(&xe->xdf1);
return -1;
}
- xdl_free_classifier(&cf);
++ if (!(xpp->flags & XDF_HISTOGRAM_DIFF))
++ xdl_free_classifier(&cf);
+
return 0;
}
* matches on the other file. Also, lines that have multiple matches
* might be potentially discarded if they happear in a run of discardable.
*/
-static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
- long i, nm, rhi, nreff, mlim;
- unsigned long hav;
+static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
+ long i, nm, nreff;
xrecord_t **recs;
- xrecord_t *rec;
+ xdlclass_t *rcrec;
char *dis, *dis1, *dis2;
if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) {
dis1 = dis;
dis2 = dis1 + xdf1->nrec + 1;
- if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
- mlim = XDL_MAX_EQLIMIT;
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
- hav = (*recs)->ha;
- rhi = (long) XDL_HASHLONG(hav, xdf2->hbits);
- for (nm = 0, rec = xdf2->rhash[rhi]; rec; rec = rec->next)
- if (rec->ha == hav && ++nm == mlim)
- break;
- dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
+ rcrec = cf->rcrecs[(*recs)->ha];
+ nm = rcrec ? rcrec->len2 : 0;
+ dis1[i] = (nm == 0) ? 0: 1;
}
- if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
- mlim = XDL_MAX_EQLIMIT;
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
- hav = (*recs)->ha;
- rhi = (long) XDL_HASHLONG(hav, xdf1->hbits);
- for (nm = 0, rec = xdf1->rhash[rhi]; rec; rec = rec->next)
- if (rec->ha == hav && ++nm == mlim)
- break;
- dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
+ rcrec = cf->rcrecs[(*recs)->ha];
+ nm = rcrec ? rcrec->len1 : 0;
+ dis2[i] = (nm == 0) ? 0: 1;
}
for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
}
-static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) {
+static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
if (xdl_trim_ends(xdf1, xdf2) < 0 ||
- xdl_cleanup_records(xdf1, xdf2) < 0) {
+ xdl_cleanup_records(cf, xdf1, xdf2) < 0) {
return -1;
}