From: Junio C Hamano Date: Tue, 28 Mar 2006 00:03:36 +0000 (-0800) Subject: Merge branch ak/svn X-Git-Tag: v1.3.0-rc1~1 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/65b5e41e24dd76e9cc272399f458857d5b13d63e?hp=a7cfb4a43fce841cd673057cf4137f85e6f804eb Merge branch ak/svn --- diff --git a/.gitignore b/.gitignore index b4355b9faf..75891c393b 100644 --- a/.gitignore +++ b/.gitignore @@ -128,8 +128,7 @@ common-cmds.h *.deb git-core.spec *.exe -libgit.a -*.o +*.[ao] *.py[co] config.mak git-blame diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index e813f84202..796d049be6 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -14,9 +14,9 @@ SYNOPSIS (-[c|d|o|i|s|u|k|m])\* [-x |--exclude=] [-X |--exclude-from=] - [--exclude-per-directory=] + [--exclude-per-directory=] [--error-unmatch] - [--full-name] [--] []\* + [--full-name] [--abbrev] [--] []\* DESCRIPTION ----------- @@ -52,6 +52,9 @@ OPTIONS If a whole directory is classified as "other", show just its name (with a trailing slash) and not its whole contents. +--no-empty-directory:: + Do not list empty directories. Has no effect without --directory. + -u|--unmerged:: Show unmerged files in the output (forces --stage) @@ -98,6 +101,11 @@ OPTIONS option forces paths to be output relative to the project top directory. +--abbrev[=]:: + Instead of showing the full 40-byte hexadecimal object + lines, show only handful hexdigits prefix. + Non default number of digits can be specified with --abbrev=. + --:: Do not interpret any more arguments as options. diff --git a/Documentation/git-ls-tree.txt b/Documentation/git-ls-tree.txt index 5bf6d8b613..018c401953 100644 --- a/Documentation/git-ls-tree.txt +++ b/Documentation/git-ls-tree.txt @@ -8,7 +8,9 @@ git-ls-tree - Lists the contents of a tree object SYNOPSIS -------- -'git-ls-tree' [-d] [-r] [-t] [-z] [--name-only] [--name-status] [paths...] +'git-ls-tree' [-d] [-r] [-t] [-z] + [--name-only] [--name-status] [--full-name] [--abbrev=[]] + [paths...] DESCRIPTION ----------- @@ -40,6 +42,11 @@ OPTIONS --name-status:: List only filenames (instead of the "long" output), one per line. +--abbrev[=]:: + Instead of showing the full 40-byte hexadecimal object + lines, show only handful hexdigits prefix. + Non default number of digits can be specified with --abbrev=. + paths:: When paths are given, show them (note that this isn't really raw pathnames, but rather a list of patterns to match). Otherwise diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index b36276c7ed..4a7e67a4d2 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -48,6 +48,18 @@ would be: / D---E---F---G master +In case of conflict, git-rebase will stop at the first problematic commit +and leave conflict markers in the tree. After resolving the conflict manually +and updating the index with the desired resolution, you can continue the +rebasing process with + + git am --resolved --3way + +Alternatively, you can undo the git-rebase with + + git reset --hard ORIG_HEAD + rm -r .dotest + OPTIONS ------- :: diff --git a/Makefile b/Makefile index 8d45378b68..4edb383321 100644 --- a/Makefile +++ b/Makefile @@ -188,9 +188,10 @@ PYMODULES = \ gitMergeCommon.py LIB_FILE=libgit.a +XDIFF_LIB=xdiff/lib.a LIB_H = \ - blob.h cache.h commit.h count-delta.h csum-file.h delta.h \ + blob.h cache.h commit.h csum-file.h delta.h \ diff.h object.h pack.h pkt-line.h quote.h refs.h \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h @@ -200,7 +201,7 @@ DIFF_OBJS = \ diffcore-delta.o LIB_OBJS = \ - blob.o commit.o connect.o count-delta.o csum-file.o \ + blob.o commit.o connect.o csum-file.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ @@ -209,8 +210,8 @@ LIB_OBJS = \ fetch-clone.o revision.o pager.o \ $(DIFF_OBJS) -LIBS = $(LIB_FILE) -LIBS += -lz +GITLIBS = $(LIB_FILE) $(XDIFF_LIB) +LIBS = $(GITLIBS) -lz # # Platform specific tweaks @@ -544,12 +545,18 @@ init-db.o: init-db.c -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c $(LIB_OBJS): $(LIB_H) -$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) +$(patsubst git-%$X,%.o,$(PROGRAMS)): $(GITLIBS) $(DIFF_OBJS): diffcore.h $(LIB_FILE): $(LIB_OBJS) $(AR) rcs $@ $(LIB_OBJS) +XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o + +$(XDIFF_LIB): $(XDIFF_OBJS) + $(AR) rcs $@ $(XDIFF_OBJS) + + doc: $(MAKE) -C Documentation all @@ -622,7 +629,8 @@ rpm: dist ### Cleaning rules clean: - rm -f *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o $(LIB_FILE) + rm -f *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \ + $(LIB_FILE) $(XDIFF_LIB) rm -f $(ALL_PROGRAMS) git$X rm -f *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags rm -rf $(GIT_TARNAME) diff --git a/blame.c b/blame.c index 7e88833a37..396defccc7 100644 --- a/blame.c +++ b/blame.c @@ -752,6 +752,7 @@ int main(int argc, const char **argv) int found_rename; const char* prefix = setup_git_directory(); + git_config(git_default_config); for(i = 1; i < argc; i++) { if(options) { diff --git a/cache.h b/cache.h index 1f962809b0..255e6b5cc7 100644 --- a/cache.h +++ b/cache.h @@ -165,6 +165,7 @@ extern void rollback_index_file(struct cache_file *); extern int trust_executable_bit; extern int assume_unchanged; extern int only_use_symrefs; +extern int warn_ambiguous_refs; extern int diff_rename_limit_default; extern int shared_repository; extern const char *apply_default_whitespace; diff --git a/cat-file.c b/cat-file.c index 1a613f3ee5..761111eb0f 100644 --- a/cat-file.c +++ b/cat-file.c @@ -100,6 +100,7 @@ int main(int argc, char **argv) int opt; setup_git_directory(); + git_config(git_default_config); if (argc != 3 || get_sha1(argv[2], sha1)) usage("git-cat-file [-t|-s|-e|-p|] "); diff --git a/config.c b/config.c index 7dbdce1966..95ec34923d 100644 --- a/config.c +++ b/config.c @@ -232,6 +232,11 @@ int git_default_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.warnambiguousrefs")) { + warn_ambiguous_refs = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "user.name")) { strncpy(git_default_name, value, sizeof(git_default_name)); return 0; diff --git a/count-delta.c b/count-delta.c deleted file mode 100644 index 058a2aadb1..0000000000 --- a/count-delta.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - * The delta-parsing part is almost straight copy of patch-delta.c - * which is (C) 2005 Nicolas Pitre . - */ -#include -#include -#include -#include "delta.h" -#include "count-delta.h" - -/* - * NOTE. We do not _interpret_ delta fully. As an approximation, we - * just count the number of bytes that are copied from the source, and - * the number of literal data bytes that are inserted. - * - * Number of bytes that are _not_ copied from the source is deletion, - * and number of inserted literal bytes are addition, so sum of them - * is the extent of damage. - */ -int count_delta(void *delta_buf, unsigned long delta_size, - unsigned long *src_copied, unsigned long *literal_added) -{ - unsigned long copied_from_source, added_literal; - const unsigned char *data, *top; - unsigned char cmd; - unsigned long src_size, dst_size, out; - - if (delta_size < DELTA_SIZE_MIN) - return -1; - - data = delta_buf; - top = delta_buf + delta_size; - - src_size = get_delta_hdr_size(&data); - dst_size = get_delta_hdr_size(&data); - - added_literal = copied_from_source = out = 0; - while (data < top) { - cmd = *data++; - if (cmd & 0x80) { - unsigned long cp_off = 0, cp_size = 0; - if (cmd & 0x01) cp_off = *data++; - if (cmd & 0x02) cp_off |= (*data++ << 8); - if (cmd & 0x04) cp_off |= (*data++ << 16); - if (cmd & 0x08) cp_off |= (*data++ << 24); - if (cmd & 0x10) cp_size = *data++; - if (cmd & 0x20) cp_size |= (*data++ << 8); - if (cmd & 0x40) cp_size |= (*data++ << 16); - if (cp_size == 0) cp_size = 0x10000; - - copied_from_source += cp_size; - out += cp_size; - } else { - /* write literal into dst */ - added_literal += cmd; - out += cmd; - data += cmd; - } - } - - /* sanity check */ - if (data != top || out != dst_size) - return -1; - - /* delete size is what was _not_ copied from source. - * edit size is that and literal additions. - */ - *src_copied = copied_from_source; - *literal_added = added_literal; - return 0; -} diff --git a/count-delta.h b/count-delta.h deleted file mode 100644 index 7359629827..0000000000 --- a/count-delta.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - */ -#ifndef COUNT_DELTA_H -#define COUNT_DELTA_H - -int count_delta(void *, unsigned long, - unsigned long *src_copied, unsigned long *literal_added); - -#endif diff --git a/diff.c b/diff.c index c0548eed98..5eae0947f3 100644 --- a/diff.c +++ b/diff.c @@ -8,8 +8,7 @@ #include "quote.h" #include "diff.h" #include "diffcore.h" - -static const char *diff_opts = "-pu"; +#include "xdiff/xdiff.h" static int use_size_cache; @@ -69,25 +68,10 @@ static const char *external_diff(void) { static const char *external_diff_cmd = NULL; static int done_preparing = 0; - const char *env_diff_opts; if (done_preparing) return external_diff_cmd; - - /* - * Default values above are meant to match the - * Linux kernel development style. Examples of - * alternative styles you can specify via environment - * variables are: - * - * GIT_DIFF_OPTS="-c"; - */ external_diff_cmd = getenv("GIT_EXTERNAL_DIFF"); - - /* In case external diff fails... */ - env_diff_opts = getenv("GIT_DIFF_OPTS"); - if (env_diff_opts) diff_opts = env_diff_opts; - done_preparing = 1; return external_diff_cmd; } @@ -101,13 +85,12 @@ static struct diff_tempfile { char tmp_path[TEMPFILE_PATH_LEN]; } diff_temp[2]; -static int count_lines(const char *filename) +static int count_lines(const char *data, int size) { - FILE *in; int count, ch, completely_empty = 1, nl_just_seen = 0; - in = fopen(filename, "r"); count = 0; - while ((ch = fgetc(in)) != EOF) + while (0 < size--) { + ch = *data++; if (ch == '\n') { count++; nl_just_seen = 1; @@ -117,7 +100,7 @@ static int count_lines(const char *filename) nl_just_seen = 0; completely_empty = 0; } - fclose(in); + } if (completely_empty) return 0; if (!nl_just_seen) @@ -140,12 +123,11 @@ static void print_line_count(int count) } } -static void copy_file(int prefix, const char *filename) +static void copy_file(int prefix, const char *data, int size) { - FILE *in; int ch, nl_just_seen = 1; - in = fopen(filename, "r"); - while ((ch = fgetc(in)) != EOF) { + while (0 < size--) { + ch = *data++; if (nl_just_seen) putchar(prefix); putchar(ch); @@ -154,92 +136,106 @@ static void copy_file(int prefix, const char *filename) else nl_just_seen = 0; } - fclose(in); if (!nl_just_seen) printf("\n\\ No newline at end of file\n"); } static void emit_rewrite_diff(const char *name_a, const char *name_b, - struct diff_tempfile *temp) + struct diff_filespec *one, + struct diff_filespec *two) { /* Use temp[i].name as input, name_a and name_b as labels */ int lc_a, lc_b; - lc_a = count_lines(temp[0].name); - lc_b = count_lines(temp[1].name); + lc_a = count_lines(one->data, one->size); + lc_b = count_lines(two->data, two->size); printf("--- %s\n+++ %s\n@@ -", name_a, name_b); print_line_count(lc_a); printf(" +"); print_line_count(lc_b); printf(" @@\n"); if (lc_a) - copy_file('-', temp[0].name); + copy_file('-', one->data, one->size); if (lc_b) - copy_file('+', temp[1].name); + copy_file('+', two->data, two->size); } -static const char *builtin_diff(const char *name_a, - const char *name_b, - struct diff_tempfile *temp, - const char *xfrm_msg, - int complete_rewrite, - const char **args) +static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) { - int i, next_at, cmd_size; - const char *const diff_cmd = "diff -L%s -L%s"; - const char *const diff_arg = "-- %s %s||:"; /* "||:" is to return 0 */ - const char *input_name_sq[2]; - const char *label_path[2]; - char *cmd; - - /* diff_cmd and diff_arg have 4 %s in total which makes - * the sum of these strings 8 bytes larger than required. - * we use 2 spaces around diff-opts, and we need to count - * terminating NUL; we used to subtract 5 here, but we do not - * care about small leaks in this subprocess that is about - * to exec "diff" anymore. - */ - cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg) - + 128); - - for (i = 0; i < 2; i++) { - input_name_sq[i] = sq_quote(temp[i].name); - if (!strcmp(temp[i].name, "/dev/null")) - label_path[i] = "/dev/null"; - else if (!i) - label_path[i] = sq_quote(quote_two("a/", name_a)); - else - label_path[i] = sq_quote(quote_two("b/", name_b)); - cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i])); + if (!DIFF_FILE_VALID(one)) { + mf->ptr = ""; /* does not matter */ + mf->size = 0; + return 0; + } + else if (diff_populate_filespec(one, 0)) + return -1; + mf->ptr = one->data; + mf->size = one->size; + return 0; +} + +struct emit_callback { + const char **label_path; +}; + +static int fn_out(void *priv, mmbuffer_t *mb, int nbuf) +{ + int i; + struct emit_callback *ecbdata = priv; + + if (ecbdata->label_path[0]) { + printf("--- %s\n", ecbdata->label_path[0]); + printf("+++ %s\n", ecbdata->label_path[1]); + ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } + for (i = 0; i < nbuf; i++) + if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout)) + return -1; + return 0; +} + +#define FIRST_FEW_BYTES 8000 +static int mmfile_is_binary(mmfile_t *mf) +{ + long sz = mf->size; + if (FIRST_FEW_BYTES < sz) + sz = FIRST_FEW_BYTES; + if (memchr(mf->ptr, 0, sz)) + return 1; + return 0; +} - cmd = xmalloc(cmd_size); - - next_at = 0; - next_at += snprintf(cmd+next_at, cmd_size-next_at, - diff_cmd, label_path[0], label_path[1]); - next_at += snprintf(cmd+next_at, cmd_size-next_at, - " %s ", diff_opts); - next_at += snprintf(cmd+next_at, cmd_size-next_at, - diff_arg, input_name_sq[0], input_name_sq[1]); - - printf("diff --git %s %s\n", - quote_two("a/", name_a), quote_two("b/", name_b)); - if (label_path[0][0] == '/') { - /* dev/null */ - printf("new file mode %s\n", temp[1].mode); +static void builtin_diff(const char *name_a, + const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + mmfile_t mf1, mf2; + const char *lbl[2]; + char *a_one, *b_two; + + a_one = quote_two("a/", name_a); + b_two = quote_two("b/", name_b); + lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; + lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; + printf("diff --git %s %s\n", a_one, b_two); + if (lbl[0][0] == '/') { + /* /dev/null */ + printf("new file mode %06o\n", two->mode); if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); } - else if (label_path[1][0] == '/') { - printf("deleted file mode %s\n", temp[0].mode); + else if (lbl[1][0] == '/') { + printf("deleted file mode %06o\n", one->mode); if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); } else { - if (strcmp(temp[0].mode, temp[1].mode)) { - printf("old mode %s\n", temp[0].mode); - printf("new mode %s\n", temp[1].mode); + if (one->mode != two->mode) { + printf("old mode %06o\n", one->mode); + printf("new mode %06o\n", two->mode); } if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); @@ -247,20 +243,45 @@ static const char *builtin_diff(const char *name_a, * we do not run diff between different kind * of objects. */ - if (strncmp(temp[0].mode, temp[1].mode, 3)) - return NULL; + if ((one->mode ^ two->mode) & S_IFMT) + goto free_ab_and_return; if (complete_rewrite) { - emit_rewrite_diff(name_a, name_b, temp); - return NULL; + emit_rewrite_diff(name_a, name_b, one, two); + goto free_ab_and_return; } } - /* This is disgusting */ - *args++ = "sh"; - *args++ = "-c"; - *args++ = cmd; - *args = NULL; - return "/bin/sh"; + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) + printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + else { + /* Crazy xdl interfaces.. */ + const char *diffopts = getenv("GIT_DIFF_OPTS"); + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + struct emit_callback ecbdata; + + ecbdata.label_path = lbl; + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = 3; + if (!diffopts) + ; + else if (!strncmp(diffopts, "--unified=", 10)) + xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); + else if (!strncmp(diffopts, "-u", 2)) + xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); + ecb.outf = fn_out; + ecb.priv = &ecbdata; + xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + } + + free_ab_and_return: + free(a_one); + free(b_two); + return; } struct diff_filespec *alloc_filespec(const char *path) @@ -463,6 +484,8 @@ void diff_free_filespec_data(struct diff_filespec *s) munmap(s->data, s->size); s->should_free = s->should_munmap = 0; s->data = NULL; + free(s->cnt_data); + s->cnt_data = NULL; } static void prep_temp_blob(struct diff_tempfile *temp, @@ -618,6 +641,7 @@ static void run_external_diff(const char *pgm, int retval; static int atexit_asked = 0; const char *othername; + const char **arg = &spawn_arg[0]; othername = (other? other : name); if (one && two) { @@ -632,36 +656,25 @@ static void run_external_diff(const char *pgm, signal(SIGINT, remove_tempfile_on_signal); } - if (pgm) { - const char **arg = &spawn_arg[0]; - if (one && two) { - *arg++ = pgm; - *arg++ = name; - *arg++ = temp[0].name; - *arg++ = temp[0].hex; - *arg++ = temp[0].mode; - *arg++ = temp[1].name; - *arg++ = temp[1].hex; - *arg++ = temp[1].mode; - if (other) { - *arg++ = other; - *arg++ = xfrm_msg; - } - } else { - *arg++ = pgm; - *arg++ = name; + if (one && two) { + *arg++ = pgm; + *arg++ = name; + *arg++ = temp[0].name; + *arg++ = temp[0].hex; + *arg++ = temp[0].mode; + *arg++ = temp[1].name; + *arg++ = temp[1].hex; + *arg++ = temp[1].mode; + if (other) { + *arg++ = other; + *arg++ = xfrm_msg; } - *arg = NULL; } else { - if (one && two) { - pgm = builtin_diff(name, othername, temp, xfrm_msg, complete_rewrite, spawn_arg); - } else - printf("* Unmerged path %s\n", name); + *arg++ = pgm; + *arg++ = name; } - - retval = 0; - if (pgm) - retval = spawn_prog(pgm, spawn_arg); + *arg = NULL; + retval = spawn_prog(pgm, spawn_arg); remove_tempfile(); if (retval) { fprintf(stderr, "external diff died, stopping at %s.\n", name); @@ -669,6 +682,26 @@ static void run_external_diff(const char *pgm, } } +static void run_diff_cmd(const char *pgm, + const char *name, + const char *other, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + if (pgm) { + run_external_diff(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); + return; + } + if (one && two) + builtin_diff(name, other ? other : name, + one, two, xfrm_msg, complete_rewrite); + else + printf("* Unmerged path %s\n", name); +} + static void diff_fill_sha1_info(struct diff_filespec *one) { if (DIFF_FILE_VALID(one)) { @@ -698,8 +731,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ - run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL, - 0); + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); return; } @@ -771,15 +803,15 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); - run_external_diff(NULL, name, other, one, null, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); free(null); null = alloc_filespec(one->path); - run_external_diff(NULL, name, other, null, two, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); free(null); } else - run_external_diff(pgm, name, other, one, two, xfrm_msg, - complete_rewrite); + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); free(name_munged); free(other_munged); diff --git a/diffcore-break.c b/diffcore-break.c index 0fc2b860be..ed0e14c6d8 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, src_copied, literal_added; - int to_break = 0; + unsigned long delta_size, base_size, src_copied, literal_added, + src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" * is the default. @@ -68,37 +68,45 @@ static int should_break(struct diff_filespec *src, if (diffcore_count_changes(src->data, src->size, dst->data, dst->size, + NULL, NULL, 0, &src_copied, &literal_added)) return 0; + /* sanity */ + if (src->size < src_copied) + src_copied = src->size; + if (dst->size < literal_added + src_copied) { + if (src_copied < dst->size) + literal_added = dst->size - src_copied; + else + literal_added = 0; + } + src_removed = src->size - src_copied; + /* Compute merge-score, which is "how much is removed * from the source material". The clean-up stage will * merge the surviving pair together if the score is * less than the minimum, after rename/copy runs. */ - if (src->size <= src_copied) - ; /* all copied, nothing removed */ - else { - delta_size = src->size - src_copied; - *merge_score_p = delta_size * MAX_SCORE / src->size; - } - + *merge_score_p = src_removed * MAX_SCORE / src->size; + /* Extent of damage, which counts both inserts and * deletes. */ - if (src->size + literal_added <= src_copied) - delta_size = 0; /* avoid wrapping around */ - else - delta_size = (src->size - src_copied) + literal_added; - - /* We break if the edit exceeds the minimum. - * i.e. (break_score / MAX_SCORE < delta_size / base_size) + delta_size = src_removed + literal_added; + if (delta_size * MAX_SCORE / base_size < break_score) + return 0; + + /* If you removed a lot without adding new material, that is + * not really a rewrite. */ - if (break_score * base_size < delta_size * MAX_SCORE) - to_break = 1; + if ((src->size * break_score < src_removed * MAX_SCORE) && + (literal_added * 20 < src_removed) && + (literal_added * 20 < src_copied)) + return 0; - return to_break; + return 1; } void diffcore_break(int break_score) diff --git a/diffcore-delta.c b/diffcore-delta.c index 1e6a6911ec..7338a40c59 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -1,43 +1,213 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" -#include "delta.h" -#include "count-delta.h" - -static int diffcore_count_changes_1(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, - unsigned long delta_limit, - unsigned long *src_copied, - unsigned long *literal_added) + +/* + * Idea here is very simple. + * + * We have total of (sz-N+1) N-byte overlapping sequences in buf whose + * size is sz. If the same N-byte sequence appears in both source and + * destination, we say the byte that starts that sequence is shared + * between them (i.e. copied from source to destination). + * + * For each possible N-byte sequence, if the source buffer has more + * instances of it than the destination buffer, that means the + * difference are the number of bytes not copied from source to + * destination. If the counts are the same, everything was copied + * from source to destination. If the destination has more, + * everything was copied, and destination added more. + * + * We are doing an approximation so we do not really have to waste + * memory by actually storing the sequence. We just hash them into + * somewhere around 2^16 hashbuckets and count the occurrences. + * + * The length of the sequence is arbitrarily set to 8 for now. + */ + +/* Wild guess at the initial hash size */ +#define INITIAL_HASH_SIZE 9 + +/* We leave more room in smaller hash but do not let it + * grow to have unused hole too much. + */ +#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2)) + +/* A prime rather carefully chosen between 2^16..2^17, so that + * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable + * size under the current 2<<17 maximum, which can hold this many + * different values before overflowing to hashtable of size 2<<18. + */ +#define HASHBASE 107927 + +struct spanhash { + unsigned int hashval; + unsigned int cnt; +}; +struct spanhash_top { + int alloc_log2; + int free; + struct spanhash data[FLEX_ARRAY]; +}; + +static struct spanhash *spanhash_find(struct spanhash_top *top, + unsigned int hashval) { - void *delta; - unsigned long delta_size; - - delta = diff_delta(src, src_size, - dst, dst_size, - &delta_size, delta_limit); - if (!delta) - /* If delta_limit is exceeded, we have too much differences */ - return -1; - - /* Estimate the edit size by interpreting delta. */ - if (count_delta(delta, delta_size, src_copied, literal_added)) { - free(delta); - return -1; + int sz = 1 << top->alloc_log2; + int bucket = hashval & (sz - 1); + while (1) { + struct spanhash *h = &(top->data[bucket++]); + if (!h->cnt) + return NULL; + if (h->hashval == hashval) + return h; + if (sz <= bucket) + bucket = 0; } - free(delta); - return 0; +} + +static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig) +{ + struct spanhash_top *new; + int i; + int osz = 1 << orig->alloc_log2; + int sz = osz << 1; + + new = xmalloc(sizeof(*orig) + sizeof(struct spanhash) * sz); + new->alloc_log2 = orig->alloc_log2 + 1; + new->free = INITIAL_FREE(new->alloc_log2); + memset(new->data, 0, sizeof(struct spanhash) * sz); + for (i = 0; i < osz; i++) { + struct spanhash *o = &(orig->data[i]); + int bucket; + if (!o->cnt) + continue; + bucket = o->hashval & (sz - 1); + while (1) { + struct spanhash *h = &(new->data[bucket++]); + if (!h->cnt) { + h->hashval = o->hashval; + h->cnt = o->cnt; + new->free--; + break; + } + if (sz <= bucket) + bucket = 0; + } + } + free(orig); + return new; +} + +static struct spanhash_top *add_spanhash(struct spanhash_top *top, + unsigned int hashval, int cnt) +{ + int bucket, lim; + struct spanhash *h; + + lim = (1 << top->alloc_log2); + bucket = hashval & (lim - 1); + while (1) { + h = &(top->data[bucket++]); + if (!h->cnt) { + h->hashval = hashval; + h->cnt = cnt; + top->free--; + if (top->free < 0) + return spanhash_rehash(top); + return top; + } + if (h->hashval == hashval) { + h->cnt += cnt; + return top; + } + if (lim <= bucket) + bucket = 0; + } +} + +static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz) +{ + int i, n; + unsigned int accum1, accum2, hashval; + struct spanhash_top *hash; + + i = INITIAL_HASH_SIZE; + hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<alloc_log2 = i; + hash->free = INITIAL_FREE(i); + memset(hash->data, 0, sizeof(struct spanhash) * (1<> 25); + accum2 = (accum2 << 7) ^ (old_1 >> 25); + accum1 += c; + if (++n < 64 && c != '\n') + continue; + hashval = (accum1 + accum2 * 0x61) % HASHBASE; + hash = add_spanhash(hash, hashval, n); + n = 0; + accum1 = accum2 = 0; + } + return hash; } int diffcore_count_changes(void *src, unsigned long src_size, void *dst, unsigned long dst_size, + void **src_count_p, + void **dst_count_p, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added) { - return diffcore_count_changes_1(src, src_size, - dst, dst_size, - delta_limit, - src_copied, - literal_added); + int i, ssz; + struct spanhash_top *src_count, *dst_count; + unsigned long sc, la; + + src_count = dst_count = NULL; + if (src_count_p) + src_count = *src_count_p; + if (!src_count) { + src_count = hash_chars(src, src_size); + if (src_count_p) + *src_count_p = src_count; + } + if (dst_count_p) + dst_count = *dst_count_p; + if (!dst_count) { + dst_count = hash_chars(dst, dst_size); + if (dst_count_p) + *dst_count_p = dst_count; + } + sc = la = 0; + + ssz = 1 << src_count->alloc_log2; + for (i = 0; i < ssz; i++) { + struct spanhash *s = &(src_count->data[i]); + struct spanhash *d; + unsigned dst_cnt, src_cnt; + if (!s->cnt) + continue; + src_cnt = s->cnt; + d = spanhash_find(dst_count, s->hashval); + dst_cnt = d ? d->cnt : 0; + if (src_cnt < dst_cnt) { + la += dst_cnt - src_cnt; + sc += src_cnt; + } + else + sc += dst_cnt; + } + + if (!src_count_p) + free(src_count); + if (!dst_count_p) + free(dst_count); + *src_copied = sc; + *literal_added = la; + return 0; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 55cf1c37f3..e992698720 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -133,7 +133,7 @@ static int estimate_similarity(struct diff_filespec *src, * match than anything else; the destination does not even * call into this function in that case. */ - unsigned long delta_size, base_size, src_copied, literal_added; + unsigned long max_size, delta_size, base_size, src_copied, literal_added; unsigned long delta_limit; int score; @@ -144,9 +144,9 @@ static int estimate_similarity(struct diff_filespec *src, if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) return 0; - delta_size = ((src->size < dst->size) ? - (dst->size - src->size) : (src->size - dst->size)); + max_size = ((src->size > dst->size) ? src->size : dst->size); base_size = ((src->size < dst->size) ? src->size : dst->size); + delta_size = max_size - base_size; /* We would not consider edits that change the file size so * drastically. delta_size must be smaller than @@ -166,23 +166,18 @@ static int estimate_similarity(struct diff_filespec *src, delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; if (diffcore_count_changes(src->data, src->size, dst->data, dst->size, + &src->cnt_data, &dst->cnt_data, delta_limit, &src_copied, &literal_added)) return 0; - /* Extent of damage */ - if (src->size + literal_added < src_copied) - delta_size = 0; - else - delta_size = (src->size - src_copied) + literal_added; - - /* - * Now we will give some score to it. 100% edit gets 0 points - * and 0% edit gets MAX_SCORE points. + /* How similar are they? + * what percentage of material in dst are from source? */ - score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); - if (score < 0) return 0; - if (MAX_SCORE < score) return MAX_SCORE; + if (!dst->size) + score = 0; /* should not happen */ + else + score = src_copied * MAX_SCORE / max_size; return score; } @@ -310,6 +305,8 @@ void diffcore_rename(struct diff_options *options) m->score = estimate_similarity(one, two, minimum_score); } + /* We do not need the text anymore */ + diff_free_filespec_data(two); dst_cnt++; } /* cost matrix sorted by most to least similar pair */ diff --git a/diffcore.h b/diffcore.h index dba4f17658..73c7842cc7 100644 --- a/diffcore.h +++ b/diffcore.h @@ -17,8 +17,8 @@ */ #define MAX_SCORE 60000.0 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ -#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/ -#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/ +#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */ +#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ @@ -26,6 +26,7 @@ struct diff_filespec { unsigned char sha1[20]; char *path; void *data; + void *cnt_data; unsigned long size; int xfrm_flags; /* for use by the xfrm */ unsigned short mode; /* file mode */ @@ -103,6 +104,8 @@ void diff_debug_queue(const char *, struct diff_queue_struct *); extern int diffcore_count_changes(void *src, unsigned long src_size, void *dst, unsigned long dst_size, + void **src_count_p, + void **dst_count_p, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added); diff --git a/environment.c b/environment.c index 16c08f0697..6df647862c 100644 --- a/environment.c +++ b/environment.c @@ -14,6 +14,7 @@ char git_default_name[MAX_GITNAME]; int trust_executable_bit = 1; int assume_unchanged = 0; int only_use_symrefs = 0; +int warn_ambiguous_refs = 1; int repository_format_version = 0; char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8"; int shared_repository = 0; diff --git a/fetch-pack.c b/fetch-pack.c index 535de10660..a3bcad016f 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -7,8 +7,9 @@ static int keep_pack; static int quiet; static int verbose; +static int fetch_all; static const char fetch_pack_usage[] = -"git-fetch-pack [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory ..."; +"git-fetch-pack [--all] [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory ..."; static const char *exec = "git-upload-pack"; #define COMPLETE (1U << 0) @@ -266,8 +267,9 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) for (prev = NULL, current = *refs; current; current = next) { next = current->next; if ((!memcmp(current->name, "refs/", 5) && - check_ref_format(current->name + 5)) || - !path_match(current->name, nr_match, match)) { + check_ref_format(current->name + 5)) || + (!fetch_all && + !path_match(current->name, nr_match, match))) { if (prev == NULL) *refs = next; else @@ -376,7 +378,11 @@ static int fetch_pack(int fd[2], int nr_match, char **match) goto all_done; } if (find_common(fd, sha1, ref) < 0) - fprintf(stderr, "warning: no common commits\n"); + if (!keep_pack) + /* When cloning, it is not unusual to have + * no common commit. + */ + fprintf(stderr, "warning: no common commits\n"); if (keep_pack) status = receive_keep_pack(fd, "git-fetch-pack", quiet); @@ -426,6 +432,10 @@ int main(int argc, char **argv) use_thin_pack = 1; continue; } + if (!strcmp("--all", arg)) { + fetch_all = 1; + continue; + } if (!strcmp("-v", arg)) { verbose = 1; continue; diff --git a/git-clone.sh b/git-clone.sh index 4ed861d576..6887321972 100755 --- a/git-clone.sh +++ b/git-clone.sh @@ -9,7 +9,7 @@ unset CDPATH usage() { - echo >&2 "Usage: $0 [--bare] [-l [-s]] [-q] [-u ] [-o ] [-n] []" + echo >&2 "Usage: $0 [--use-separate-remote] [--reference ] [--bare] [-l [-s]] [-q] [-u ] [-o ] [-n] []" exit 1 } @@ -40,13 +40,62 @@ Perhaps git-update-server-info needs to be run there?" do name=`expr "$refname" : 'refs/\(.*\)'` && case "$name" in - *^*) ;; - *) - git-http-fetch -v -a -w "$name" "$name" "$1/" || exit 1 + *^*) continue;; esac + if test -n "$use_separate_remote" && + branch_name=`expr "$name" : 'heads/\(.*\)'` + then + tname="remotes/$origin/$branch_name" + else + tname=$name + fi + git-http-fetch -v -a -w "$tname" "$name" "$1/" || exit 1 done <"$clone_tmp/refs" rm -fr "$clone_tmp" + http_fetch "$1/HEAD" "$GIT_DIR/REMOTE_HEAD" +} + +# Read git-fetch-pack -k output and store the remote branches. +copy_refs=' +use File::Path qw(mkpath); +use File::Basename qw(dirname); +my $git_dir = $ARGV[0]; +my $use_separate_remote = $ARGV[1]; +my $origin = $ARGV[2]; + +my $branch_top = ($use_separate_remote ? "remotes/$origin" : "heads"); +my $tag_top = "tags"; + +sub store { + my ($sha1, $name, $top) = @_; + $name = "$git_dir/refs/$top/$name"; + mkpath(dirname($name)); + open O, ">", "$name"; + print O "$sha1\n"; + close O; +} + +open FH, "<", "$git_dir/CLONE_HEAD"; +while () { + my ($sha1, $name) = /^([0-9a-f]{40})\s(.*)$/; + next if ($name =~ /\^\173/); + if ($name eq "HEAD") { + open O, ">", "$git_dir/REMOTE_HEAD"; + print O "$sha1\n"; + close O; + next; + } + if ($name =~ s/^refs\/heads\///) { + store($sha1, $name, $branch_top); + next; + } + if ($name =~ s/^refs\/tags\///) { + store($sha1, $name, $tag_top); + next; + } } +close FH; +' quiet= use_local=no @@ -54,8 +103,10 @@ local_shared=no no_checkout= upload_pack= bare= -origin=origin +reference= +origin= origin_override= +use_separate_remote= while case "$#,$1" in 0,*) break ;; @@ -68,9 +119,21 @@ while *,-s|*,--s|*,--sh|*,--sha|*,--shar|*,--share|*,--shared) local_shared=yes; use_local=yes ;; *,-q|*,--quiet) quiet=-q ;; + *,--use-separate-remote) + use_separate_remote=t ;; 1,-o) usage;; + 1,--reference) usage ;; + *,--reference) + shift; reference="$1" ;; + *,--reference=*) + reference=`expr "$1" : '--reference=\(.*\)'` ;; *,-o) - git-check-ref-format "$2" || { + case "$2" in + */*) + echo >&2 "'$2' is not suitable for an origin name" + exit 1 + esac + git-check-ref-format "heads/$2" || { echo >&2 "'$2' is not suitable for a branch name" exit 1 } @@ -100,9 +163,19 @@ then echo >&2 '--bare and -o $origin options are incompatible.' exit 1 fi + if test t = "$use_separate_remote" + then + echo >&2 '--bare and --use-separate-remote options are incompatible.' + exit 1 + fi no_checkout=yes fi +if test -z "$origin" +then + origin=origin +fi + # Turn the source into an absolute path if # it is local repo="$1" @@ -130,6 +203,28 @@ yes) GIT_DIR="$D/.git" ;; esac +if test -n "$reference" +then + if test -d "$reference" + then + if test -d "$reference/.git/objects" + then + reference="$reference/.git" + fi + reference=$(cd "$reference" && pwd) + echo "$reference/objects" >"$GIT_DIR/objects/info/alternates" + (cd "$reference" && tar cf - refs) | + (cd "$GIT_DIR/refs" && + mkdir reference-tmp && + cd reference-tmp && + tar xf -) + else + echo >&2 "$reference: not a local directory." && usage + fi +fi + +rm -f "$GIT_DIR/CLONE_HEAD" + # We do local magic only when the user tells us to. case "$local,$use_local" in yes,yes) @@ -165,24 +260,14 @@ yes,yes) } >"$GIT_DIR/objects/info/alternates" ;; esac - - # Make a duplicate of refs and HEAD pointer - HEAD= - if test -f "$repo/HEAD" - then - HEAD=HEAD - fi - (cd "$repo" && tar cf - refs $HEAD) | - (cd "$GIT_DIR" && tar xf -) || exit 1 + git-ls-remote "$repo" >"$GIT_DIR/CLONE_HEAD" ;; *) case "$repo" in rsync://*) rsync $quiet -av --ignore-existing \ - --exclude info "$repo/objects/" "$GIT_DIR/objects/" && - rsync $quiet -av --ignore-existing \ - --exclude info "$repo/refs/" "$GIT_DIR/refs/" || exit - + --exclude info "$repo/objects/" "$GIT_DIR/objects/" || + exit # Look at objects/info/alternates for rsync -- http will # support it natively and git native ones will do it on the # remote end. Not having that file is not a crime. @@ -205,6 +290,7 @@ yes,yes) done rm -f "$GIT_DIR/TMP_ALT" fi + git-ls-remote "$repo" >"$GIT_DIR/CLONE_HEAD" ;; http://*) if test -z "@@NO_CURL@@" @@ -217,38 +303,89 @@ yes,yes) ;; *) cd "$D" && case "$upload_pack" in - '') git-clone-pack $quiet "$repo" ;; - *) git-clone-pack $quiet "$upload_pack" "$repo" ;; - esac || { - echo >&2 "clone-pack from '$repo' failed." + '') git-fetch-pack --all -k $quiet "$repo" ;; + *) git-fetch-pack --all -k $quiet "$upload_pack" "$repo" ;; + esac >"$GIT_DIR/CLONE_HEAD" || { + echo >&2 "fetch-pack from '$repo' failed." exit 1 } ;; esac ;; esac +test -d "$GIT_DIR/refs/reference-tmp" && rm -fr "$GIT_DIR/refs/reference-tmp" + +if test -f "$GIT_DIR/CLONE_HEAD" +then + # Figure out where the remote HEAD points at. + perl -e "$copy_refs" "$GIT_DIR" "$use_separate_remote" "$origin" +fi cd "$D" || exit -if test -f "$GIT_DIR/HEAD" && test -z "$bare" +if test -z "$bare" && test -f "$GIT_DIR/REMOTE_HEAD" then - head_points_at=`git-symbolic-ref HEAD` + head_sha1=`cat "$GIT_DIR/REMOTE_HEAD"` + # Figure out which remote branch HEAD points at. + case "$use_separate_remote" in + '') remote_top=refs/heads ;; + *) remote_top="refs/remotes/$origin" ;; + esac + + # What to use to track the remote primary branch + if test -n "$use_separate_remote" + then + origin_tracking="remotes/$origin/master" + else + origin_tracking="heads/$origin" + fi + + # The name under $remote_top the remote HEAD seems to point at + head_points_at=$( + ( + echo "master" + cd "$GIT_DIR/$remote_top" && + find . -type f -print | sed -e 's/^\.\///' + ) | ( + done=f + while read name + do + test t = $done && continue + branch_tip=`cat "$GIT_DIR/$remote_top/$name"` + if test "$head_sha1" = "$branch_tip" + then + echo "$name" + done=t + fi + done + ) + ) + + # Write out remotes/$origin file. case "$head_points_at" in - refs/heads/*) - head_points_at=`expr "$head_points_at" : 'refs/heads/\(.*\)'` + ?*) mkdir -p "$GIT_DIR/remotes" && - echo >"$GIT_DIR/remotes/origin" \ + echo >"$GIT_DIR/remotes/$origin" \ "URL: $repo -Pull: $head_points_at:$origin" && - git-update-ref "refs/heads/$origin" $(git-rev-parse HEAD) && - (cd "$GIT_DIR" && find "refs/heads" -type f -print) | - while read ref +Pull: refs/heads/$head_points_at:refs/$origin_tracking" && + case "$use_separate_remote" in + t) git-update-ref HEAD "$head_sha1" ;; + *) git-update-ref "refs/heads/$origin" $(git-rev-parse HEAD) ;; + esac && + (cd "$GIT_DIR/$remote_top" && find . -type f -print) | + while read dotslref do - head=`expr "$ref" : 'refs/heads/\(.*\)'` && - test "$head_points_at" = "$head" || - test "$origin" = "$head" || - echo "Pull: ${head}:${head}" - done >>"$GIT_DIR/remotes/origin" + name=`expr "$dotslref" : './\(.*\)'` && + test "$head_points_at" = "$name" || + test "$origin" = "$name" || + echo "Pull: refs/heads/${name}:$remote_top/${name}" + done >>"$GIT_DIR/remotes/$origin" && + case "$use_separate_remote" in + t) + rm -f "refs/remotes/$origin/HEAD" + git-symbolic-ref "refs/remotes/$origin/HEAD" \ + "refs/remotes/$origin/$head_points_at" + esac esac case "$no_checkout" in @@ -256,6 +393,7 @@ Pull: $head_points_at:$origin" && git-read-tree -m -u -v HEAD HEAD esac fi +rm -f "$GIT_DIR/CLONE_HEAD" "$GIT_DIR/REMOTE_HEAD" trap - exit diff --git a/git-fetch.sh b/git-fetch.sh index 68356343a6..954901ddce 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -94,6 +94,9 @@ append_fetch_head () { # remote-nick is the URL given on the command line (or a shorthand) # remote-name is the $GIT_DIR relative refs/ path we computed # for this refspec. + + # the $note_ variable will be fed to git-fmt-merge-msg for further + # processing. case "$remote_name_" in HEAD) note_= ;; @@ -103,6 +106,9 @@ append_fetch_head () { refs/tags/*) note_="$(expr "$remote_name_" : 'refs/tags/\(.*\)')" note_="tag '$note_' of " ;; + refs/remotes/*) + note_="$(expr "$remote_name_" : 'refs/remotes/\(.*\)')" + note_="remote branch '$note_' of " ;; *) note_="$remote_name of " ;; esac @@ -147,10 +153,10 @@ fast_forward_local () { else echo >&2 "* $1: storing $3" fi - git-update-ref "$1" "$2" + git-update-ref "$1" "$2" ;; - refs/heads/*) + refs/heads/* | refs/remotes/*) # $1 is the ref being updated. # $2 is the new value for the ref. local=$(git-rev-parse --verify "$1^0" 2>/dev/null) diff --git a/git-fmt-merge-msg.perl b/git-fmt-merge-msg.perl index afe80e6321..5986e5414a 100755 --- a/git-fmt-merge-msg.perl +++ b/git-fmt-merge-msg.perl @@ -75,6 +75,7 @@ sub shortlog { $src{$src} = { BRANCH => [], TAG => [], + R_BRANCH => [], GENERIC => [], # &1 == has HEAD. # &2 == has others. @@ -91,6 +92,11 @@ sub shortlog { push @{$src{$src}{TAG}}, $1; $src{$src}{HEAD_STATUS} |= 2; } + elsif (/^remote branch (.*)$/) { + $origin = $1; + push @{$src{$src}{R_BRANCH}}, $1; + $src{$src}{HEAD_STATUS} |= 2; + } elsif (/^HEAD$/) { $origin = $src; $src{$src}{HEAD_STATUS} |= 1; @@ -123,6 +129,8 @@ sub shortlog { } push @this, andjoin("branch ", "branches ", $src{$src}{BRANCH}); + push @this, andjoin("remote branch ", "remote branches ", + $src{$src}{R_BRANCH}); push @this, andjoin("tag ", "tags ", $src{$src}{TAG}); push @this, andjoin("commit ", "commits ", diff --git a/git-merge.sh b/git-merge.sh index cc0952a97d..78ab422e4e 100755 --- a/git-merge.sh +++ b/git-merge.sh @@ -11,11 +11,15 @@ LF=' ' all_strategies='recursive octopus resolve stupid ours' -default_strategies='recursive' +default_twohead_strategies='recursive' +default_octopus_strategies='octopus' +no_trivial_merge_strategies='ours' use_strategies= + +index_merge=t if test "@@NO_PYTHON@@"; then all_strategies='resolve octopus stupid ours' - default_strategies='resolve' + default_twohead_strategies='resolve' fi dropsave() { @@ -90,8 +94,6 @@ do shift done -test "$#" -le 2 && usage ;# we need at least two heads. - merge_msg="$1" shift head_arg="$1" @@ -99,6 +101,8 @@ head=$(git-rev-parse --verify "$1"^0) || usage shift # All the rest are remote heads +test "$#" = 0 && usage ;# we need at least one remote head. + remoteheads= for remote do @@ -108,6 +112,27 @@ do done set x $remoteheads ; shift +case "$use_strategies" in +'') + case "$#" in + 1) + use_strategies="$default_twohead_strategies" ;; + *) + use_strategies="$default_octopus_strategies" ;; + esac + ;; +esac + +for s in $use_strategies +do + case " $s " in + *" $no_trivial_merge_strategies "*) + index_merge=f + break + ;; + esac +done + case "$#" in 1) common=$(git-merge-base --all $head "$@") @@ -118,18 +143,21 @@ case "$#" in esac echo "$head" >"$GIT_DIR/ORIG_HEAD" -case "$#,$common,$no_commit" in -*,'',*) +case "$index_merge,$#,$common,$no_commit" in +f,*) + # We've been told not to try anything clever. Skip to real merge. + ;; +?,*,'',*) # No common ancestors found. We need a real merge. ;; -1,"$1",*) +?,1,"$1",*) # If head can reach all the merge then we are up to date. - # but first the most common case of merging one remote + # but first the most common case of merging one remote. echo "Already up-to-date." dropsave exit 0 ;; -1,"$head",*) +?,1,"$head",*) # Again the most common case of merging one remote. echo "Updating from $head to $1" git-update-index --refresh 2>/dev/null @@ -139,11 +167,11 @@ case "$#,$common,$no_commit" in dropsave exit 0 ;; -1,?*"$LF"?*,*) +?,1,?*"$LF"?*,*) # We are not doing octopus and not fast forward. Need a # real merge. ;; -1,*,) +?,1,*,) # We are not doing octopus, not fast forward, and have only # one common. See if it is really trivial. git var GIT_COMMITTER_IDENT >/dev/null || exit @@ -188,17 +216,6 @@ esac # We are going to make a new commit. git var GIT_COMMITTER_IDENT >/dev/null || exit -case "$use_strategies" in -'') - case "$#" in - 1) - use_strategies="$default_strategies" ;; - *) - use_strategies=octopus ;; - esac - ;; -esac - # At this point, we need a real merge. No matter what strategy # we use, it would operate on the index, possibly affecting the # working tree, and when resolved cleanly, have the desired tree @@ -270,11 +287,7 @@ done # auto resolved the merge cleanly. if test '' != "$result_tree" then - parents="-p $head" - for remote - do - parents="$parents -p $remote" - done + parents=$(git-show-branch --independent "$head" "$@" | sed -e 's/^/-p /') result_commit=$(echo "$merge_msg" | git-commit-tree $result_tree $parents) || exit finish "$result_commit" "Merge $result_commit, made by $wt_strategy." dropsave diff --git a/git-parse-remote.sh b/git-parse-remote.sh index 5f158c613f..63f22818e6 100755 --- a/git-parse-remote.sh +++ b/git-parse-remote.sh @@ -86,14 +86,14 @@ canon_refs_list_for_fetch () { local=$(expr "$ref" : '[^:]*:\(.*\)') case "$remote" in '') remote=HEAD ;; - refs/heads/* | refs/tags/*) ;; - heads/* | tags/* ) remote="refs/$remote" ;; + refs/heads/* | refs/tags/* | refs/remotes/*) ;; + heads/* | tags/* | remotes/* ) remote="refs/$remote" ;; *) remote="refs/heads/$remote" ;; esac case "$local" in '') local= ;; - refs/heads/* | refs/tags/*) ;; - heads/* | tags/* ) local="refs/$local" ;; + refs/heads/* | refs/tags/* | refs/remotes/*) ;; + heads/* | tags/* | remotes/* ) local="refs/$local" ;; *) local="refs/heads/$local" ;; esac diff --git a/git-push.sh b/git-push.sh index 73dcf067cb..f10cadbf15 100755 --- a/git-push.sh +++ b/git-push.sh @@ -8,7 +8,7 @@ USAGE='[--all] [--tags] [--force] [...]' has_all= has_force= has_exec= -has_thin= +has_thin=--thin remote= do_tags= @@ -24,7 +24,9 @@ do --exec=*) has_exec="$1" ;; --thin) - has_thin="$1" ;; + ;; # noop + --no-thin) + has_thin= ;; -*) usage ;; *) diff --git a/git-send-email.perl b/git-send-email.perl index b220d11cc1..ecfa347b85 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -19,10 +19,16 @@ use strict; use warnings; use Term::ReadLine; -use Mail::Sendmail qw(sendmail %mailcfg); use Getopt::Long; use Data::Dumper; -use Email::Valid; +use Net::SMTP; + +# most mail servers generate the Date: header, but not all... +$ENV{LC_ALL} = 'C'; +use POSIX qw/strftime/; + +my $have_email_valid = eval { require Email::Valid; 1 }; +my $smtp; sub unique_email_list(@); sub cleanup_compose_files(); @@ -31,7 +37,7 @@ my $compose_filename = ".msg.$$"; # Variables we fill in automatically, or via prompting: -my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose); +my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose,$time); # Behavior modification variables my ($chain_reply_to, $smtp_server, $quiet, $suppress_from, $no_signed_off_cc) = (1, "localhost", 0, 0, 0); @@ -244,6 +250,16 @@ sub gitvar_ident { # Variables we set as part of the loop over files our ($message_id, $cc, %mail, $subject, $reply_to, $message); +sub extract_valid_address { + my $address = shift; + if ($have_email_valid) { + return Email::Valid->address($address); + } else { + # less robust/correct than the monster regexp in Email::Valid, + # but still does a 99% job, and one less dependency + return ($address =~ /([^\"<>\s]+@[^<>\s]+)/); + } +} # Usually don't need to change anything below here. @@ -253,13 +269,12 @@ sub gitvar_ident { # 1 second since the last time we were called. # We'll setup a template for the message id, using the "from" address: -my $message_id_from = Email::Valid->address($from); +my $message_id_from = extract_valid_address($from); my $message_id_template = "<%s-git-send-email-$message_id_from>"; sub make_message_id { - my $date = `date "+\%s"`; - chomp($date); + my $date = time; my $pseudo_rand = int (rand(4200)); $message_id = sprintf $message_id_template, "$date$pseudo_rand"; #print "new message id = $message_id\n"; # Was useful for debugging @@ -268,38 +283,49 @@ sub make_message_id $cc = ""; +$time = time - scalar $#files; sub send_message { - my $to = join (", ", unique_email_list(@to)); - - %mail = ( To => $to, - From => $from, - CC => $cc, - Subject => $subject, - Message => $message, - 'Reply-to' => $from, - 'In-Reply-To' => $reply_to, - 'Message-ID' => $message_id, - 'X-Mailer' => "git-send-email", - ); - - $mail{smtp} = $smtp_server; - $mailcfg{mime} = 0; - - #print Data::Dumper->Dump([\%mail],[qw(*mail)]); - - sendmail(%mail) or die $Mail::Sendmail::error; + my @recipients = unique_email_list(@to); + my $to = join (",\n\t", @recipients); + @recipients = unique_email_list(@recipients,@cc); + my $date = strftime('%a, %d %b %Y %H:%M:%S %z', localtime($time++)); + + my $header = "From: $from +To: $to +Cc: $cc +Subject: $subject +Reply-To: $from +Date: $date +Message-Id: $message_id +X-Mailer: git-send-email @@GIT_VERSION@@ +"; + $header .= "In-Reply-To: $reply_to\n" if $reply_to; + + $smtp ||= Net::SMTP->new( $smtp_server ); + $smtp->mail( $from ) or die $smtp->message; + $smtp->to( @recipients ) or die $smtp->message; + $smtp->data or die $smtp->message; + $smtp->datasend("$header\n$message") or die $smtp->message; + $smtp->dataend() or die $smtp->message; + $smtp->ok or die "Failed to send $subject\n".$smtp->message; if ($quiet) { printf "Sent %s\n", $subject; } else { - print "OK. Log says:\n", $Mail::Sendmail::log; - print "\n\n" + print "OK. Log says: +Date: $date +Server: $smtp_server Port: 25 +From: $from +Subject: $subject +Cc: $cc +To: $to + +Result: ", $smtp->code, ' ', ($smtp->message =~ /\n([^\n]+\n)$/s), "\n"; } } - $reply_to = $initial_reply_to; make_message_id(); $subject = $initial_subject; @@ -390,14 +416,14 @@ () } - +$smtp->quit if $smtp; sub unique_email_list(@) { my %seen; my @emails; foreach my $entry (@_) { - my $clean = Email::Valid->address($entry); + my $clean = extract_valid_address($entry); next if $seen{$clean}++; push @emails, $entry; } diff --git a/ls-files.c b/ls-files.c index e42119c5ee..4a4af1ca3b 100644 --- a/ls-files.c +++ b/ls-files.c @@ -11,6 +11,7 @@ #include "cache.h" #include "quote.h" +static int abbrev = 0; static int show_deleted = 0; static int show_cached = 0; static int show_others = 0; @@ -20,6 +21,7 @@ static int show_unmerged = 0; static int show_modified = 0; static int show_killed = 0; static int show_other_directories = 0; +static int hide_empty_directories = 0; static int show_valid_bit = 0; static int line_terminator = '\n'; @@ -258,11 +260,12 @@ static int dir_exists(const char *dirname, int len) * Also, we ignore the name ".git" (even if it is not a directory). * That likely will not change. */ -static void read_directory(const char *path, const char *base, int baselen) +static int read_directory(const char *path, const char *base, int baselen) { - DIR *dir = opendir(path); + DIR *fdir = opendir(path); + int contents = 0; - if (dir) { + if (fdir) { int exclude_stk; struct dirent *de; char fullname[MAXPATHLEN + 1]; @@ -270,7 +273,7 @@ static void read_directory(const char *path, const char *base, int baselen) exclude_stk = push_exclude_per_directory(base, baselen); - while ((de = readdir(dir)) != NULL) { + while ((de = readdir(fdir)) != NULL) { int len; if ((de->d_name[0] == '.') && @@ -288,6 +291,7 @@ static void read_directory(const char *path, const char *base, int baselen) switch (DTYPE(de)) { struct stat st; + int subdir, rewind_base; default: continue; case DT_UNKNOWN: @@ -301,22 +305,32 @@ static void read_directory(const char *path, const char *base, int baselen) case DT_DIR: memcpy(fullname + baselen + len, "/", 2); len++; + rewind_base = nr_dir; + subdir = read_directory(fullname, fullname, + baselen + len); if (show_other_directories && - !dir_exists(fullname, baselen + len)) + (subdir || !hide_empty_directories) && + !dir_exists(fullname, baselen + len)) { + // Rewind the read subdirectory + while (nr_dir > rewind_base) + free(dir[--nr_dir]); break; - read_directory(fullname, fullname, - baselen + len); + } + contents += subdir; continue; case DT_REG: case DT_LNK: break; } add_name(fullname, baselen + len); + contents++; } - closedir(dir); + closedir(fdir); pop_exclude_per_directory(exclude_stk); } + + return contents; } static int cmp_name(const void *p1, const void *p2) @@ -489,7 +503,8 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) printf("%s%06o %s %d\t", tag, ntohl(ce->ce_mode), - sha1_to_hex(ce->sha1), + abbrev ? find_unique_abbrev(ce->sha1,abbrev) + : sha1_to_hex(ce->sha1), ce_stage(ce)); write_name_quoted("", 0, ce->name + offset, line_terminator, stdout); @@ -630,7 +645,8 @@ static void verify_pathspec(void) static const char ls_files_usage[] = "git-ls-files [-z] [-t] [-v] (--[cached|deleted|others|stage|unmerged|killed|modified])* " "[ --ignored ] [--exclude=] [--exclude-from=] " - "[ --exclude-per-directory= ] [--full-name] [--] []*"; + "[ --exclude-per-directory= ] [--full-name] [--abbrev] " + "[--] []*"; int main(int argc, const char **argv) { @@ -696,6 +712,10 @@ int main(int argc, const char **argv) show_other_directories = 1; continue; } + if (!strcmp(arg, "--no-empty-directory")) { + hide_empty_directories = 1; + continue; + } if (!strcmp(arg, "-u") || !strcmp(arg, "--unmerged")) { /* There's no point in showing unmerged unless * you also show the stage information. @@ -737,6 +757,18 @@ int main(int argc, const char **argv) error_unmatch = 1; continue; } + if (!strncmp(arg, "--abbrev=", 9)) { + abbrev = strtoul(arg+9, NULL, 10); + if (abbrev && abbrev < MINIMUM_ABBREV) + abbrev = MINIMUM_ABBREV; + else if (abbrev > 40) + abbrev = 40; + continue; + } + if (!strcmp(arg, "--abbrev")) { + abbrev = DEFAULT_ABBREV; + continue; + } if (*arg == '-') usage(ls_files_usage); break; diff --git a/ls-tree.c b/ls-tree.c index d005643ee0..26258c3cf9 100644 --- a/ls-tree.c +++ b/ls-tree.c @@ -13,13 +13,14 @@ static int line_termination = '\n'; #define LS_TREE_ONLY 2 #define LS_SHOW_TREES 4 #define LS_NAME_ONLY 8 +static int abbrev = 0; static int ls_options = 0; const char **pathspec; static int chomp_prefix = 0; static const char *prefix; static const char ls_tree_usage[] = - "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] [path...]"; + "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] [--abbrev[=]] [path...]"; static int show_recursive(const char *base, int baselen, const char *pathname) { @@ -73,7 +74,9 @@ static int show_tree(unsigned char *sha1, const char *base, int baselen, return 0; if (!(ls_options & LS_NAME_ONLY)) - printf("%06o %s %s\t", mode, type, sha1_to_hex(sha1)); + printf("%06o %s %s\t", mode, type, + abbrev ? find_unique_abbrev(sha1,abbrev) + : sha1_to_hex(sha1)); write_name_quoted(base + chomp_prefix, baselen - chomp_prefix, pathname, line_termination, stdout); @@ -87,6 +90,7 @@ int main(int argc, const char **argv) struct tree *tree; prefix = setup_git_directory(); + git_config(git_default_config); if (prefix && *prefix) chomp_prefix = strlen(prefix); while (1 < argc && argv[1][0] == '-') { @@ -113,6 +117,18 @@ int main(int argc, const char **argv) chomp_prefix = 0; break; } + if (!strncmp(argv[1]+2, "abbrev=",7)) { + abbrev = strtoul(argv[1]+9, NULL, 10); + if (abbrev && abbrev < MINIMUM_ABBREV) + abbrev = MINIMUM_ABBREV; + else if (abbrev > 40) + abbrev = 40; + break; + } + if (!strcmp(argv[1]+2, "abbrev")) { + abbrev = DEFAULT_ABBREV; + break; + } /* otherwise fallthru */ default: usage(ls_tree_usage); diff --git a/merge-base.c b/merge-base.c index e73fca7453..07f5ab4d1c 100644 --- a/merge-base.c +++ b/merge-base.c @@ -237,6 +237,7 @@ int main(int argc, char **argv) unsigned char rev1key[20], rev2key[20]; setup_git_directory(); + git_config(git_default_config); while (1 < argc && argv[1][0] == '-') { char *arg = argv[1]; diff --git a/name-rev.c b/name-rev.c index 0c3f547622..bad8a53777 100644 --- a/name-rev.c +++ b/name-rev.c @@ -127,6 +127,7 @@ int main(int argc, char **argv) int as_is = 0, all = 0, transform_stdin = 0; setup_git_directory(); + git_config(git_default_config); if (argc < 2) usage(name_rev_usage); diff --git a/read-tree.c b/read-tree.c index 1c3b09beff..eaff444196 100644 --- a/read-tree.c +++ b/read-tree.c @@ -717,6 +717,7 @@ int main(int argc, char **argv) merge_fn_t fn = NULL; setup_git_directory(); + git_config(git_default_config); newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (newfd < 0) diff --git a/rev-list.c b/rev-list.c index 812d237f47..441c437855 100644 --- a/rev-list.c +++ b/rev-list.c @@ -40,13 +40,18 @@ static int bisect_list = 0; static int verbose_header = 0; static int abbrev = DEFAULT_ABBREV; static int show_parents = 0; +static int show_timestamp = 0; static int hdr_termination = 0; static const char *commit_prefix = ""; static enum cmit_fmt commit_format = CMIT_FMT_RAW; static void show_commit(struct commit *commit) { - printf("%s%s", commit_prefix, sha1_to_hex(commit->object.sha1)); + if (show_timestamp) + printf("%lu ", commit->date); + if (commit_prefix[0]) + fputs(commit_prefix, stdout); + fputs(sha1_to_hex(commit->object.sha1), stdout); if (show_parents) { struct commit_list *parents = commit->parents; while (parents) { @@ -335,6 +340,10 @@ int main(int argc, const char **argv) show_parents = 1; continue; } + if (!strcmp(arg, "--timestamp")) { + show_timestamp = 1; + continue; + } if (!strcmp(arg, "--bisect")) { bisect_list = 1; continue; diff --git a/rev-parse.c b/rev-parse.c index f90e999e60..f176c56f7f 100644 --- a/rev-parse.c +++ b/rev-parse.c @@ -166,13 +166,17 @@ int main(int argc, char **argv) unsigned char sha1[20]; const char *prefix = setup_git_directory(); + git_config(git_default_config); + for (i = 1; i < argc; i++) { struct stat st; char *arg = argv[i]; char *dotdot; - + if (as_is) { - show_file(arg); + if (show_file(arg) && as_is < 2) + if (lstat(arg, &st) < 0) + die("'%s': %s", arg, strerror(errno)); continue; } if (!strcmp(arg,"-n")) { @@ -192,7 +196,7 @@ int main(int argc, char **argv) if (*arg == '-') { if (!strcmp(arg, "--")) { - as_is = 1; + as_is = 2; /* Pass on the "--" if we show anything but files.. */ if (filter & (DO_FLAGS | DO_REVS)) show_file(arg); diff --git a/revision.c b/revision.c index 12cd0529a5..d67718c75b 100644 --- a/revision.c +++ b/revision.c @@ -649,7 +649,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch /* If we didn't have a "--", all filenames must exist */ for (j = i; j < argc; j++) { if (lstat(argv[j], &st) < 0) - die("'%s': %s", arg, strerror(errno)); + die("'%s': %s", argv[j], strerror(errno)); } revs->prune_data = get_pathspec(revs->prefix, argv + i); break; diff --git a/send-pack.c b/send-pack.c index c8ffc8d537..409f188503 100644 --- a/send-pack.c +++ b/send-pack.c @@ -362,6 +362,8 @@ int main(int argc, char **argv) pid_t pid; setup_git_directory(); + git_config(git_default_config); + argv++; for (i = 1; i < argc; i++, argv++) { char *arg = *argv; diff --git a/sha1_name.c b/sha1_name.c index d67de18ba5..4f92e12a8d 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -235,14 +235,21 @@ static int ambiguous_path(const char *path, int len) static int get_sha1_basic(const char *str, int len, unsigned char *sha1) { - static const char *prefix[] = { - "", - "refs", - "refs/tags", - "refs/heads", + static const char *fmt[] = { + "%.*s", + "refs/%.*s", + "refs/tags/%.*s", + "refs/heads/%.*s", + "refs/remotes/%.*s", + "refs/remotes/%.*s/HEAD", NULL }; const char **p; + const char *warning = "warning: refname '%.*s' is ambiguous.\n"; + char *pathname; + int already_found = 0; + unsigned char *this_result; + unsigned char sha1_from_ref[20]; if (len == 40 && !get_sha1_hex(str, sha1)) return 0; @@ -251,11 +258,21 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1) if (ambiguous_path(str, len)) return -1; - for (p = prefix; *p; p++) { - char *pathname = git_path("%s/%.*s", *p, len, str); - if (!read_ref(pathname, sha1)) - return 0; + for (p = fmt; *p; p++) { + this_result = already_found ? sha1_from_ref : sha1; + pathname = git_path(*p, len, str); + if (!read_ref(pathname, this_result)) { + if (warn_ambiguous_refs) { + if (already_found) + fprintf(stderr, warning, len, str); + already_found++; + } + else + return 0; + } } + if (already_found) + return 0; return -1; } diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh index 2e3c20d6b9..08c11319a7 100755 --- a/t/t4001-diff-rename.sh +++ b/t/t4001-diff-rename.sh @@ -49,7 +49,7 @@ rename from path0 rename to path1 --- a/path0 +++ b/path1 -@@ -8,7 +8,7 @@ Line 7 +@@ -8,7 +8,7 @@ Line 8 Line 9 Line 10 diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index adc5e937de..278eb66701 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -34,6 +34,9 @@ test_expect_success \ mkdir a/bin && cp /bin/sh a/bin && ln -s a a/l1 && + (p=long_path_to_a_file && cd a && + for depth in 1 2 3 4 5; do mkdir $p && cd $p; done && + echo text >file_with_long_path) && (cd a && find .) | sort >a.lst' test_expect_success \ diff --git a/tar-tree.c b/tar-tree.c index e478e13e28..8d9e31c206 100644 --- a/tar-tree.c +++ b/tar-tree.c @@ -1,37 +1,23 @@ /* - * Copyright (c) 2005 Rene Scharfe + * Copyright (c) 2005, 2006 Rene Scharfe */ #include #include "cache.h" #include "diff.h" #include "commit.h" +#include "strbuf.h" +#include "tar.h" #define RECORDSIZE (512) #define BLOCKSIZE (RECORDSIZE * 20) -#define TYPEFLAG_AUTO '\0' -#define TYPEFLAG_REG '0' -#define TYPEFLAG_LNK '2' -#define TYPEFLAG_DIR '5' -#define TYPEFLAG_GLOBAL_HEADER 'g' -#define TYPEFLAG_EXT_HEADER 'x' - -#define EXT_HEADER_PATH 1 -#define EXT_HEADER_LINKPATH 2 - static const char tar_tree_usage[] = "git-tar-tree [basedir]"; static char block[BLOCKSIZE]; static unsigned long offset; -static const char *basedir; static time_t archive_time; -struct path_prefix { - struct path_prefix *prev; - const char *name; -}; - /* tries hard to write, either succeeds or dies in the attempt */ static void reliable_write(void *buf, unsigned long size) { @@ -119,230 +105,170 @@ static void write_blocked(void *buf, unsigned long size) write_if_needed(); } -static void append_string(char **p, const char *s) -{ - unsigned int len = strlen(s); - memcpy(*p, s, len); - *p += len; -} - -static void append_char(char **p, char c) -{ - **p = c; - *p += 1; -} - -static void append_path_prefix(char **buffer, struct path_prefix *prefix) +static void strbuf_append_string(struct strbuf *sb, const char *s) { - if (!prefix) - return; - append_path_prefix(buffer, prefix->prev); - append_string(buffer, prefix->name); - append_char(buffer, '/'); -} - -static unsigned int path_prefix_len(struct path_prefix *prefix) -{ - if (!prefix) - return 0; - return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1; -} - -static void append_path(char **p, int is_dir, const char *basepath, - struct path_prefix *prefix, const char *path) -{ - if (basepath) { - append_string(p, basepath); - append_char(p, '/'); + int slen = strlen(s); + int total = sb->len + slen; + if (total > sb->alloc) { + sb->buf = xrealloc(sb->buf, total); + sb->alloc = total; } - append_path_prefix(p, prefix); - append_string(p, path); - if (is_dir) - append_char(p, '/'); + memcpy(sb->buf + sb->len, s, slen); + sb->len = total; } -static unsigned int path_len(int is_dir, const char *basepath, - struct path_prefix *prefix, const char *path) -{ - unsigned int len = 0; - if (basepath) - len += strlen(basepath) + 1; - len += path_prefix_len(prefix) + strlen(path); - if (is_dir) - len++; - return len; -} - -static void append_extended_header_prefix(char **p, unsigned int size, - const char *keyword) +/* + * pax extended header records have the format "%u %s=%s\n". %u contains + * the size of the whole string (including the %u), the first %s is the + * keyword, the second one is the value. This function constructs such a + * string and appends it to a struct strbuf. + */ +static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, + const char *value, unsigned int valuelen) { - int len = sprintf(*p, "%u %s=", size, keyword); - *p += len; -} + char *p; + int len, total, tmp; -static unsigned int extended_header_len(const char *keyword, - unsigned int valuelen) -{ /* "%u %s=%s\n" */ - unsigned int len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1; - if (len > 9) - len++; - if (len > 99) + len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1; + for (tmp = len; tmp > 9; tmp /= 10) len++; - return len; -} -static void append_extended_header(char **p, const char *keyword, - const char *value, unsigned int len) -{ - unsigned int size = extended_header_len(keyword, len); - append_extended_header_prefix(p, size, keyword); - memcpy(*p, value, len); - *p += len; - append_char(p, '\n'); -} + total = sb->len + len; + if (total > sb->alloc) { + sb->buf = xrealloc(sb->buf, total); + sb->alloc = total; + } -static void write_header(const unsigned char *, char, const char *, struct path_prefix *, - const char *, unsigned int, void *, unsigned long); + p = sb->buf; + p += sprintf(p, "%u %s=", len, keyword); + memcpy(p, value, valuelen); + p += valuelen; + *p = '\n'; + sb->len = total; +} -/* stores a pax extended header directly in the block buffer */ -static void write_extended_header(const char *headerfilename, int is_dir, - unsigned int flags, const char *basepath, - struct path_prefix *prefix, - const char *path, unsigned int namelen, - void *content, unsigned int contentsize) +static unsigned int ustar_header_chksum(const struct ustar_header *header) { - char *buffer, *p; - unsigned int pathlen, size, linkpathlen = 0; - - size = pathlen = extended_header_len("path", namelen); - if (flags & EXT_HEADER_LINKPATH) { - linkpathlen = extended_header_len("linkpath", contentsize); - size += linkpathlen; - } - write_header(NULL, TYPEFLAG_EXT_HEADER, NULL, NULL, headerfilename, - 0100600, NULL, size); - - buffer = p = malloc(size); - if (!buffer) - die("git-tar-tree: %s", strerror(errno)); - append_extended_header_prefix(&p, pathlen, "path"); - append_path(&p, is_dir, basepath, prefix, path); - append_char(&p, '\n'); - if (flags & EXT_HEADER_LINKPATH) - append_extended_header(&p, "linkpath", content, contentsize); - write_blocked(buffer, size); - free(buffer); + char *p = (char *)header; + unsigned int chksum = 0; + while (p < header->chksum) + chksum += *p++; + chksum += sizeof(header->chksum) * ' '; + p += sizeof(header->chksum); + while (p < (char *)header + sizeof(struct ustar_header)) + chksum += *p++; + return chksum; } -static void write_global_extended_header(const unsigned char *sha1) +static int get_path_prefix(const struct strbuf *path, int maxlen) { - char *p; - unsigned int size; - - size = extended_header_len("comment", 40); - write_header(NULL, TYPEFLAG_GLOBAL_HEADER, NULL, NULL, - "pax_global_header", 0100600, NULL, size); - - p = get_record(); - append_extended_header(&p, "comment", sha1_to_hex(sha1), 40); - write_if_needed(); + int i = path->len; + if (i > maxlen) + i = maxlen; + while (i > 0 && path->buf[i] != '/') + i--; + return i; } -/* stores a ustar header directly in the block buffer */ -static void write_header(const unsigned char *sha1, char typeflag, const char *basepath, - struct path_prefix *prefix, const char *path, - unsigned int mode, void *buffer, unsigned long size) +static void write_entry(const unsigned char *sha1, struct strbuf *path, + unsigned int mode, void *buffer, unsigned long size) { - unsigned int namelen; - char *header = NULL; - unsigned int checksum = 0; - int i; - unsigned int ext_header = 0; - - if (typeflag == TYPEFLAG_AUTO) { - if (S_ISDIR(mode)) - typeflag = TYPEFLAG_DIR; - else if (S_ISLNK(mode)) - typeflag = TYPEFLAG_LNK; - else - typeflag = TYPEFLAG_REG; - } - - namelen = path_len(S_ISDIR(mode), basepath, prefix, path); - if (namelen > 100) - ext_header |= EXT_HEADER_PATH; - if (typeflag == TYPEFLAG_LNK && size > 100) - ext_header |= EXT_HEADER_LINKPATH; - - /* the extended header must be written before the normal one */ - if (ext_header) { - char headerfilename[51]; - sprintf(headerfilename, "%s.paxheader", sha1_to_hex(sha1)); - write_extended_header(headerfilename, S_ISDIR(mode), - ext_header, basepath, prefix, path, - namelen, buffer, size); - } - - header = get_record(); - - if (ext_header) { - sprintf(header, "%s.data", sha1_to_hex(sha1)); + struct ustar_header header; + struct strbuf ext_header; + + memset(&header, 0, sizeof(header)); + ext_header.buf = NULL; + ext_header.len = ext_header.alloc = 0; + + if (!sha1) { + *header.typeflag = TYPEFLAG_GLOBAL_HEADER; + mode = 0100666; + strcpy(header.name, "pax_global_header"); + } else if (!path) { + *header.typeflag = TYPEFLAG_EXT_HEADER; + mode = 0100666; + sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); } else { - char *p = header; - append_path(&p, S_ISDIR(mode), basepath, prefix, path); + if (S_ISDIR(mode)) { + *header.typeflag = TYPEFLAG_DIR; + mode |= 0777; + } else if (S_ISLNK(mode)) { + *header.typeflag = TYPEFLAG_LNK; + mode |= 0777; + } else if (S_ISREG(mode)) { + *header.typeflag = TYPEFLAG_REG; + mode |= (mode & 0100) ? 0777 : 0666; + } else { + error("unsupported file mode: 0%o (SHA1: %s)", + mode, sha1_to_hex(sha1)); + return; + } + if (path->len > sizeof(header.name)) { + int plen = get_path_prefix(path, sizeof(header.prefix)); + int rest = path->len - plen - 1; + if (plen > 0 && rest <= sizeof(header.name)) { + memcpy(header.prefix, path->buf, plen); + memcpy(header.name, path->buf + plen + 1, rest); + } else { + sprintf(header.name, "%s.data", + sha1_to_hex(sha1)); + strbuf_append_ext_header(&ext_header, "path", + path->buf, path->len); + } + } else + memcpy(header.name, path->buf, path->len); } - if (typeflag == TYPEFLAG_LNK) { - if (ext_header & EXT_HEADER_LINKPATH) { - sprintf(&header[157], "see %s.paxheader", + if (S_ISLNK(mode) && buffer) { + if (size > sizeof(header.linkname)) { + sprintf(header.linkname, "see %s.paxheader", sha1_to_hex(sha1)); - } else { - if (buffer) - strncpy(&header[157], buffer, size); - } + strbuf_append_ext_header(&ext_header, "linkpath", + buffer, size); + } else + memcpy(header.linkname, buffer, size); } - if (S_ISDIR(mode)) - mode |= 0777; - else if (S_ISREG(mode)) - mode |= (mode & 0100) ? 0777 : 0666; - else if (S_ISLNK(mode)) - mode |= 0777; - sprintf(&header[100], "%07o", mode & 07777); + sprintf(header.mode, "%07o", mode & 07777); + sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0); + sprintf(header.mtime, "%011lo", archive_time); /* XXX: should we provide more meaningful info here? */ - sprintf(&header[108], "%07o", 0); /* uid */ - sprintf(&header[116], "%07o", 0); /* gid */ - strncpy(&header[265], "git", 31); /* uname */ - strncpy(&header[297], "git", 31); /* gname */ - - if (S_ISDIR(mode) || S_ISLNK(mode)) - size = 0; - sprintf(&header[124], "%011lo", size); - sprintf(&header[136], "%011lo", archive_time); + sprintf(header.uid, "%07o", 0); + sprintf(header.gid, "%07o", 0); + strncpy(header.uname, "git", 31); + strncpy(header.gname, "git", 31); + sprintf(header.devmajor, "%07o", 0); + sprintf(header.devminor, "%07o", 0); - header[156] = typeflag; + memcpy(header.magic, "ustar", 6); + memcpy(header.version, "00", 2); - memcpy(&header[257], "ustar", 6); - memcpy(&header[263], "00", 2); + sprintf(header.chksum, "%07o", ustar_header_chksum(&header)); - sprintf(&header[329], "%07o", 0); /* devmajor */ - sprintf(&header[337], "%07o", 0); /* devminor */ - - memset(&header[148], ' ', 8); - for (i = 0; i < RECORDSIZE; i++) - checksum += header[i]; - sprintf(&header[148], "%07o", checksum & 0x1fffff); + if (ext_header.len > 0) { + write_entry(sha1, NULL, 0, ext_header.buf, ext_header.len); + free(ext_header.buf); + } + write_blocked(&header, sizeof(header)); + if (S_ISREG(mode) && buffer && size > 0) + write_blocked(buffer, size); +} - write_if_needed(); +static void write_global_extended_header(const unsigned char *sha1) +{ + struct strbuf ext_header; + ext_header.buf = NULL; + ext_header.len = ext_header.alloc = 0; + strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); + write_entry(NULL, NULL, 0, ext_header.buf, ext_header.len); + free(ext_header.buf); } -static void traverse_tree(struct tree_desc *tree, - struct path_prefix *prefix) +static void traverse_tree(struct tree_desc *tree, struct strbuf *path) { - struct path_prefix this_prefix; - this_prefix.prev = prefix; + int pathlen = path->len; while (tree->size) { const char *name; @@ -358,16 +284,19 @@ static void traverse_tree(struct tree_desc *tree, eltbuf = read_sha1_file(sha1, elttype, &eltsize); if (!eltbuf) die("cannot read %s", sha1_to_hex(sha1)); - write_header(sha1, TYPEFLAG_AUTO, basedir, - prefix, name, mode, eltbuf, eltsize); + + path->len = pathlen; + strbuf_append_string(path, name); + if (S_ISDIR(mode)) + strbuf_append_string(path, "/"); + + write_entry(sha1, path, mode, eltbuf, eltsize); + if (S_ISDIR(mode)) { struct tree_desc subtree; subtree.buf = eltbuf; subtree.size = eltsize; - this_prefix.name = name; - traverse_tree(&subtree, &this_prefix); - } else if (!S_ISLNK(mode)) { - write_blocked(eltbuf, eltsize); + traverse_tree(&subtree, path); } free(eltbuf); } @@ -375,15 +304,22 @@ static void traverse_tree(struct tree_desc *tree, int main(int argc, char **argv) { - unsigned char sha1[20]; + unsigned char sha1[20], tree_sha1[20]; struct commit *commit; struct tree_desc tree; + struct strbuf current_path; + + current_path.buf = xmalloc(PATH_MAX); + current_path.alloc = PATH_MAX; + current_path.len = current_path.eof = 0; setup_git_directory(); + git_config(git_default_config); switch (argc) { case 3: - basedir = argv[2]; + strbuf_append_string(¤t_path, argv[2]); + strbuf_append_string(¤t_path, "/"); /* FALLTHROUGH */ case 2: if (get_sha1(argv[1], sha1) < 0) @@ -397,17 +333,19 @@ int main(int argc, char **argv) if (commit) { write_global_extended_header(commit->object.sha1); archive_time = commit->date; - } - tree.buf = read_object_with_reference(sha1, "tree", &tree.size, NULL); + } else + archive_time = time(NULL); + + tree.buf = read_object_with_reference(sha1, "tree", &tree.size, + tree_sha1); if (!tree.buf) die("not a reference to a tag, commit or tree object: %s", sha1_to_hex(sha1)); - if (!archive_time) - archive_time = time(NULL); - if (basedir) - write_header((unsigned char *)"0", TYPEFLAG_DIR, NULL, NULL, - basedir, 040777, NULL, 0); - traverse_tree(&tree, NULL); + + if (current_path.len > 0) + write_entry(tree_sha1, ¤t_path, 040777, NULL, 0); + traverse_tree(&tree, ¤t_path); write_trailer(); + free(current_path.buf); return 0; } diff --git a/tar.h b/tar.h new file mode 100644 index 0000000000..3467705e9b --- /dev/null +++ b/tar.h @@ -0,0 +1,25 @@ +#define TYPEFLAG_AUTO '\0' +#define TYPEFLAG_REG '0' +#define TYPEFLAG_LNK '2' +#define TYPEFLAG_DIR '5' +#define TYPEFLAG_GLOBAL_HEADER 'g' +#define TYPEFLAG_EXT_HEADER 'x' + +struct ustar_header { + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag[1]; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ +}; diff --git a/unpack-file.c b/unpack-file.c index 07303f8bb3..3accb974dd 100644 --- a/unpack-file.c +++ b/unpack-file.c @@ -30,6 +30,7 @@ int main(int argc, char **argv) usage("git-unpack-file "); setup_git_directory(); + git_config(git_default_config); puts(create_temp_file(sha1)); return 0; diff --git a/update-ref.c b/update-ref.c index e6fbddbab6..ba4bf5153e 100644 --- a/update-ref.c +++ b/update-ref.c @@ -25,6 +25,7 @@ int main(int argc, char **argv) int fd, written; setup_git_directory(); + git_config(git_default_config); if (argc < 3 || argc > 4) usage(git_update_ref_usage); diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h new file mode 100644 index 0000000000..71cb9391af --- /dev/null +++ b/xdiff/xdiff.h @@ -0,0 +1,91 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XDIFF_H) +#define XDIFF_H + +#ifdef __cplusplus +extern "C" { +#endif /* #ifdef __cplusplus */ + + +#define XDF_NEED_MINIMAL (1 << 1) + +#define XDL_PATCH_NORMAL '-' +#define XDL_PATCH_REVERSE '+' +#define XDL_PATCH_MODEMASK ((1 << 8) - 1) +#define XDL_PATCH_IGNOREBSPACE (1 << 8) + +#define XDL_MMB_READONLY (1 << 0) + +#define XDL_MMF_ATOMIC (1 << 0) + +#define XDL_BDOP_INS 1 +#define XDL_BDOP_CPY 2 +#define XDL_BDOP_INSB 3 + + +typedef struct s_mmfile { + char *ptr; + long size; +} mmfile_t; + +typedef struct s_mmbuffer { + char *ptr; + long size; +} mmbuffer_t; + +typedef struct s_xpparam { + unsigned long flags; +} xpparam_t; + +typedef struct s_xdemitcb { + void *priv; + int (*outf)(void *, mmbuffer_t *, int); +} xdemitcb_t; + +typedef struct s_xdemitconf { + long ctxlen; +} xdemitconf_t; + +typedef struct s_bdiffparam { + long bsize; +} bdiffparam_t; + + +#define xdl_malloc(x) malloc(x) +#define xdl_free(ptr) free(ptr) +#define xdl_realloc(ptr,x) realloc(ptr,x) + +void *xdl_mmfile_first(mmfile_t *mmf, long *size); +void *xdl_mmfile_next(mmfile_t *mmf, long *size); +long xdl_mmfile_size(mmfile_t *mmf); + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb); + +#ifdef __cplusplus +} +#endif /* #ifdef __cplusplus */ + +#endif /* #if !defined(XDIFF_H) */ + diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c new file mode 100644 index 0000000000..8ea04837ec --- /dev/null +++ b/xdiff/xdiffi.c @@ -0,0 +1,469 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#include "xinclude.h" + + + +#define XDL_MAX_COST_MIN 256 +#define XDL_HEUR_MIN_COST 256 +#define XDL_LINE_MAX (long)((1UL << (8 * sizeof(long) - 1)) - 1) +#define XDL_SNAKE_CNT 20 +#define XDL_K_HEUR 4 + + + +typedef struct s_xdpsplit { + long i1, i2; + int min_lo, min_hi; +} xdpsplit_t; + + + + +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv); +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); + + + + +/* + * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. + * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both + * the forward diagonal starting from (off1, off2) and the backward diagonal + * starting from (lim1, lim2). If the K values on the same diagonal crosses + * returns the furthest point of reach. We might end up having to expensive + * cases using this algorithm is full, so a little bit of heuristic is needed + * to cut the search and to return a suboptimal point. + */ +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv) { + long dmin = off1 - lim2, dmax = lim1 - off2; + long fmid = off1 - off2, bmid = lim1 - lim2; + long odd = (fmid - bmid) & 1; + long fmin = fmid, fmax = fmid; + long bmin = bmid, bmax = bmid; + long ec, d, i1, i2, prev1, best, dd, v, k; + + /* + * Set initial diagonal values for both forward and backward path. + */ + kvdf[fmid] = off1; + kvdb[bmid] = lim1; + + for (ec = 1;; ec++) { + int got_snake = 0; + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the extenal K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (fmin > dmin) + kvdf[--fmin - 1] = -1; + else + ++fmin; + if (fmax < dmax) + kvdf[++fmax + 1] = -1; + else + --fmax; + + for (d = fmax; d >= fmin; d -= 2) { + if (kvdf[d - 1] >= kvdf[d + 1]) + i1 = kvdf[d - 1] + 1; + else + i1 = kvdf[d + 1]; + prev1 = i1; + i2 = i1 - d; + for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); + if (i1 - prev1 > xenv->snake_cnt) + got_snake = 1; + kvdf[d] = i1; + if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the extenal K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (bmin > dmin) + kvdb[--bmin - 1] = XDL_LINE_MAX; + else + ++bmin; + if (bmax < dmax) + kvdb[++bmax + 1] = XDL_LINE_MAX; + else + --bmax; + + for (d = bmax; d >= bmin; d -= 2) { + if (kvdb[d - 1] < kvdb[d + 1]) + i1 = kvdb[d - 1]; + else + i1 = kvdb[d + 1] - 1; + prev1 = i1; + i2 = i1 - d; + for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); + if (prev1 - i1 > xenv->snake_cnt) + got_snake = 1; + kvdb[d] = i1; + if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + if (need_min) + continue; + + /* + * If the edit cost is above the heuristic trigger and if + * we got a good snake, we sample current diagonals to see + * if some of the, have reached an "interesting" path. Our + * measure is a function of the distance from the diagonal + * corner (i1 + i2) penalized with the distance from the + * mid diagonal itself. If this value is above the current + * edit cost times a magic factor (XDL_K_HEUR) we consider + * it interesting. + */ + if (got_snake && ec > xenv->heur_min) { + for (best = 0, d = fmax; d >= fmin; d -= 2) { + dd = d > fmid ? d - fmid: fmid - d; + i1 = kvdf[d]; + i2 = i1 - d; + v = (i1 - off1) + (i2 - off2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 + xenv->snake_cnt <= i1 && i1 < lim1 && + off2 + xenv->snake_cnt <= i2 && i2 < lim2) { + for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) + if (k == xenv->snake_cnt) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 1; + spl->min_hi = 0; + return ec; + } + + for (best = 0, d = bmax; d >= bmin; d -= 2) { + dd = d > bmid ? d - bmid: bmid - d; + i1 = kvdb[d]; + i2 = i1 - d; + v = (lim1 - i1) + (lim2 - i2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 < i1 && i1 <= lim1 - xenv->snake_cnt && + off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { + for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) + if (k == xenv->snake_cnt - 1) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 0; + spl->min_hi = 1; + return ec; + } + } + + /* + * Enough is enough. We spent too much time here and now we collect + * the furthest reaching path using the (i1 + i2) measure. + */ + if (ec >= xenv->mxcost) { + long fbest, fbest1, bbest, bbest1; + + fbest = -1; + for (d = fmax; d >= fmin; d -= 2) { + i1 = XDL_MIN(kvdf[d], lim1); + i2 = i1 - d; + if (lim2 < i2) + i1 = lim2 + d, i2 = lim2; + if (fbest < i1 + i2) { + fbest = i1 + i2; + fbest1 = i1; + } + } + + bbest = XDL_LINE_MAX; + for (d = bmax; d >= bmin; d -= 2) { + i1 = XDL_MAX(off1, kvdb[d]); + i2 = i1 - d; + if (i2 < off2) + i1 = off2 + d, i2 = off2; + if (i1 + i2 < bbest) { + bbest = i1 + i2; + bbest1 = i1; + } + } + + if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { + spl->i1 = fbest1; + spl->i2 = fbest - fbest1; + spl->min_lo = 1; + spl->min_hi = 0; + } else { + spl->i1 = bbest1; + spl->i2 = bbest - bbest1; + spl->min_lo = 0; + spl->min_hi = 1; + } + return ec; + } + } + + return -1; +} + + +/* + * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling + * the box splitting function. Note that the real job (marking changed lines) + * is done in the two boundary reaching checks. + */ +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { + unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; + + /* + * Shrink the box by walking through each diagonal snake (SW and NE). + */ + for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); + for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); + + /* + * If one dimension is empty, then all records on the other one must + * be obviously changed. + */ + if (off1 == lim1) { + char *rchg2 = dd2->rchg; + long *rindex2 = dd2->rindex; + + for (; off2 < lim2; off2++) + rchg2[rindex2[off2]] = 1; + } else if (off2 == lim2) { + char *rchg1 = dd1->rchg; + long *rindex1 = dd1->rindex; + + for (; off1 < lim1; off1++) + rchg1[rindex1[off1]] = 1; + } else { + long ec; + xdpsplit_t spl; + + /* + * Divide ... + */ + if ((ec = xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, + need_min, &spl, xenv)) < 0) { + + return -1; + } + + /* + * ... et Impera. + */ + if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, + kvdf, kvdb, spl.min_lo, xenv) < 0 || + xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, + kvdf, kvdb, spl.min_hi, xenv) < 0) { + + return -1; + } + } + + return 0; +} + + +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long ndiags; + long *kvd, *kvdf, *kvdb; + xdalgoenv_t xenv; + diffdata_t dd1, dd2; + + if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { + + return -1; + } + + /* + * Allocate and setup K vectors to be used by the differential algorithm. + * One is to store the forward path and one to store the backward path. + */ + ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; + if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { + + xdl_free_env(xe); + return -1; + } + kvdf = kvd; + kvdb = kvdf + ndiags; + kvdf += xe->xdf2.nreff + 1; + kvdb += xe->xdf2.nreff + 1; + + /* + * Classical integer square root approximation using shifts. + */ + xenv.mxcost = 1; + for (; ndiags; ndiags >>= 2) + xenv.mxcost <<= 1; + if (xenv.mxcost < XDL_MAX_COST_MIN) + xenv.mxcost = XDL_MAX_COST_MIN; + xenv.snake_cnt = XDL_SNAKE_CNT; + xenv.heur_min = XDL_HEUR_MIN_COST; + + dd1.nrec = xe->xdf1.nreff; + dd1.ha = xe->xdf1.ha; + dd1.rchg = xe->xdf1.rchg; + dd1.rindex = xe->xdf1.rindex; + dd2.nrec = xe->xdf2.nreff; + dd2.ha = xe->xdf2.ha; + dd2.rchg = xe->xdf2.rchg; + dd2.rindex = xe->xdf2.rindex; + + if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, + kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { + + xdl_free(kvd); + xdl_free_env(xe); + return -1; + } + + xdl_free(kvd); + + return 0; +} + + +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) { + xdchange_t *xch; + + if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) + return NULL; + + xch->next = xscr; + xch->i1 = i1; + xch->i2 = i2; + xch->chg1 = chg1; + xch->chg2 = chg2; + + return xch; +} + + +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { + xdchange_t *cscr = NULL, *xch; + char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; + long i1, i2, l1, l2; + + /* + * Trivial. Collects "groups" of changes and creates an edit script. + */ + for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) + if (rchg1[i1 - 1] || rchg2[i2 - 1]) { + for (l1 = i1; rchg1[i1 - 1]; i1--); + for (l2 = i2; rchg2[i2 - 1]; i2--); + + if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { + xdl_free_script(cscr); + return -1; + } + cscr = xch; + } + + *xscr = cscr; + + return 0; +} + + +void xdl_free_script(xdchange_t *xscr) { + xdchange_t *xch; + + while ((xch = xscr) != NULL) { + xscr = xscr->next; + xdl_free(xch); + } +} + + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb) { + xdchange_t *xscr; + xdfenv_t xe; + + if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) { + + return -1; + } + + if (xdl_build_script(&xe, &xscr) < 0) { + + xdl_free_env(&xe); + return -1; + } + + if (xscr) { + if (xdl_emit_diff(&xe, xscr, ecb, xecfg) < 0) { + + xdl_free_script(xscr); + xdl_free_env(&xe); + return -1; + } + + xdl_free_script(xscr); + } + + xdl_free_env(&xe); + + return 0; +} + diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h new file mode 100644 index 0000000000..dd8f3c986b --- /dev/null +++ b/xdiff/xdiffi.h @@ -0,0 +1,60 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XDIFFI_H) +#define XDIFFI_H + + +typedef struct s_diffdata { + long nrec; + unsigned long const *ha; + long *rindex; + char *rchg; +} diffdata_t; + +typedef struct s_xdalgoenv { + long mxcost; + long snake_cnt; + long heur_min; +} xdalgoenv_t; + +typedef struct s_xdchange { + struct s_xdchange *next; + long i1, i2; + long chg1, chg2; +} xdchange_t; + + + +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); +void xdl_free_script(xdchange_t *xscr); +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + + +#endif /* #if !defined(XDIFFI_H) */ + diff --git a/xdiff/xemit.c b/xdiff/xemit.c new file mode 100644 index 0000000000..2e5d54cfcf --- /dev/null +++ b/xdiff/xemit.c @@ -0,0 +1,141 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#include "xinclude.h" + + + + +static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec); +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb); +static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg); + + + + +static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { + + *rec = xdf->recs[ri]->ptr; + + return xdf->recs[ri]->size; +} + + +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { + long size, psize = strlen(pre); + char const *rec; + + size = xdl_get_rec(xdf, ri, &rec); + if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { + + return -1; + } + + return 0; +} + + +/* + * Starting at the passed change atom, find the latest change atom to be included + * inside the differential hunk according to the specified configuration. + */ +static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) { + xdchange_t *xch, *xchp; + + for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next) + if (xch->i1 - (xchp->i1 + xchp->chg1) > 2 * xecfg->ctxlen) + break; + + return xchp; +} + + +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg) { + long s1, s2, e1, e2, lctx; + xdchange_t *xch, *xche; + + for (xch = xche = xscr; xch; xch = xche->next) { + xche = xdl_get_hunk(xch, xecfg); + + s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); + s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); + + lctx = xecfg->ctxlen; + lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); + lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); + + e1 = xche->i1 + xche->chg1 + lctx; + e2 = xche->i2 + xche->chg2 + lctx; + + /* + * Emit current hunk header. + */ + if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, ecb) < 0) + return -1; + + /* + * Emit pre-context. + */ + for (; s1 < xch->i1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + + for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { + /* + * Merge previous with current change atom. + */ + for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + + /* + * Removes lines from the first file. + */ + for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) + return -1; + + /* + * Adds lines from the second file. + */ + for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) + if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) + return -1; + + if (xch == xche) + break; + s1 = xch->i1 + xch->chg1; + s2 = xch->i2 + xch->chg2; + } + + /* + * Emit post-context. + */ + for (s1 = xche->i1 + xche->chg1; s1 < e1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + } + + return 0; +} + diff --git a/xdiff/xemit.h b/xdiff/xemit.h new file mode 100644 index 0000000000..e629417dd2 --- /dev/null +++ b/xdiff/xemit.h @@ -0,0 +1,34 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XEMIT_H) +#define XEMIT_H + + + +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + + + +#endif /* #if !defined(XEMIT_H) */ + diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h new file mode 100644 index 0000000000..9490fc56ca --- /dev/null +++ b/xdiff/xinclude.h @@ -0,0 +1,42 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XINCLUDE_H) +#define XINCLUDE_H + +#include +#include +#include +#include +#include + +#include "xmacros.h" +#include "xdiff.h" +#include "xtypes.h" +#include "xutils.h" +#include "xprepare.h" +#include "xdiffi.h" +#include "xemit.h" + + +#endif /* #if !defined(XINCLUDE_H) */ + diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h new file mode 100644 index 0000000000..4c2fde80c1 --- /dev/null +++ b/xdiff/xmacros.h @@ -0,0 +1,53 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XMACROS_H) +#define XMACROS_H + + +#define GR_PRIME 0x9e370001UL + + +#define XDL_MIN(a, b) ((a) < (b) ? (a): (b)) +#define XDL_MAX(a, b) ((a) > (b) ? (a): (b)) +#define XDL_ABS(v) ((v) >= 0 ? (v): -(v)) +#define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define XDL_HASHLONG(v, b) (((unsigned long)(v) * GR_PRIME) >> ((CHAR_BIT * sizeof(unsigned long)) - (b))) +#define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) +#define XDL_LE32_PUT(p, v) \ +do { \ + unsigned char *__p = (unsigned char *) (p); \ + *__p++ = (unsigned char) (v); \ + *__p++ = (unsigned char) ((v) >> 8); \ + *__p++ = (unsigned char) ((v) >> 16); \ + *__p = (unsigned char) ((v) >> 24); \ +} while (0) +#define XDL_LE32_GET(p, v) \ +do { \ + unsigned char const *__p = (unsigned char const *) (p); \ + (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \ + ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \ +} while (0) + + +#endif /* #if !defined(XMACROS_H) */ + diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c new file mode 100644 index 0000000000..27a08791a2 --- /dev/null +++ b/xdiff/xprepare.c @@ -0,0 +1,436 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#include "xinclude.h" + + + +#define XDL_KPDIS_RUN 4 + + + +typedef struct s_xdlclass { + struct s_xdlclass *next; + unsigned long ha; + char const *line; + long size; + long idx; +} xdlclass_t; + +typedef struct s_xdlclassifier { + unsigned int hbits; + long hsize; + xdlclass_t **rchash; + chastore_t ncha; + long count; +} xdlclassifier_t; + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size); +static void xdl_free_classifier(xdlclassifier_t *cf); +static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, + xrecord_t *rec); +static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf); +static void xdl_free_ctx(xdfile_t *xdf); +static int xdl_clean_mmatch(char const *dis, long i, long s, long e); +static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size) { + long i; + + cf->hbits = xdl_hashbits((unsigned int) size); + cf->hsize = 1 << cf->hbits; + + if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) { + + return -1; + } + if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) { + + xdl_cha_free(&cf->ncha); + return -1; + } + for (i = 0; i < cf->hsize; i++) + cf->rchash[i] = NULL; + + cf->count = 0; + + return 0; +} + + +static void xdl_free_classifier(xdlclassifier_t *cf) { + + xdl_free(cf->rchash); + xdl_cha_free(&cf->ncha); +} + + +static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, + xrecord_t *rec) { + long hi; + char const *line; + xdlclass_t *rcrec; + + line = rec->ptr; + hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); + for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) + if (rcrec->ha == rec->ha && rcrec->size == rec->size && + !memcmp(line, rcrec->line, rec->size)) + break; + + if (!rcrec) { + if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { + + return -1; + } + rcrec->idx = cf->count++; + rcrec->line = line; + rcrec->size = rec->size; + rcrec->ha = rec->ha; + rcrec->next = cf->rchash[hi]; + cf->rchash[hi] = rcrec; + } + + rec->ha = (unsigned long) rcrec->idx; + + hi = (long) XDL_HASHLONG(rec->ha, hbits); + rec->next = rhash[hi]; + rhash[hi] = rec; + + return 0; +} + + +static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf) { + unsigned int hbits; + long i, nrec, hsize, bsize; + unsigned long hav; + char const *blk, *cur, *top, *prev; + xrecord_t *crec; + xrecord_t **recs, **rrecs; + xrecord_t **rhash; + unsigned long *ha; + char *rchg; + long *rindex; + + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { + + return -1; + } + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { + + xdl_cha_free(&xdf->rcha); + return -1; + } + + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { + + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + for (i = 0; i < hsize; i++) + rhash[i] = NULL; + + nrec = 0; + if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { + for (top = blk + bsize;;) { + if (cur >= top) { + if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) + break; + top = blk + bsize; + } + prev = cur; + hav = xdl_hash_record(&cur, top); + if (nrec >= narec) { + narec *= 2; + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + recs = rrecs; + } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + crec->ptr = prev; + crec->size = (long) (cur - prev); + crec->ha = hav; + recs[nrec++] = crec; + + if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + } + } + + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + memset(rchg, 0, (nrec + 2) * sizeof(char)); + + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { + + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { + + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + + xdf->nrec = nrec; + xdf->recs = recs; + xdf->hbits = hbits; + xdf->rhash = rhash; + xdf->rchg = rchg + 1; + xdf->rindex = rindex; + xdf->nreff = 0; + xdf->ha = ha; + xdf->dstart = 0; + xdf->dend = nrec - 1; + + return 0; +} + + +static void xdl_free_ctx(xdfile_t *xdf) { + + xdl_free(xdf->rhash); + xdl_free(xdf->rindex); + xdl_free(xdf->rchg - 1); + xdl_free(xdf->ha); + xdl_free(xdf->recs); + xdl_cha_free(&xdf->rcha); +} + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long enl1, enl2; + xdlclassifier_t cf; + + enl1 = xdl_guess_lines(mf1) + 1; + enl2 = xdl_guess_lines(mf2) + 1; + + if (xdl_init_classifier(&cf, enl1 + enl2 + 1) < 0) { + + return -1; + } + + if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { + + xdl_free_classifier(&cf); + return -1; + } + if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf1); + xdl_free_classifier(&cf); + return -1; + } + + xdl_free_classifier(&cf); + + if (xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); + return -1; + } + + return 0; +} + + +void xdl_free_env(xdfenv_t *xe) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); +} + + +static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { + long r, rdis, rpdis; + + for (r = 1, rdis = 0, rpdis = 1; (i - r) >= s; r++) { + if (!dis[i - r]) + rdis++; + else if (dis[i - r] == 2) + rpdis++; + else + break; + } + for (r = 1; (i + r) <= e; r++) { + if (!dis[i + r]) + rdis++; + else if (dis[i + r] == 2) + rpdis++; + else + break; + } + + return rpdis * XDL_KPDIS_RUN < (rpdis + rdis); +} + + +/* + * Try to reduce the problem complexity, discard records that have no + * matches on the other file. Also, lines that have multiple matches + * might be potentially discarded if they happear in a run of discardable. + */ +static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) { + long i, rhi, nreff; + unsigned long hav; + xrecord_t **recs; + xrecord_t *rec; + char *dis, *dis1, *dis2; + + if (!(dis = (char *) xdl_malloc((xdf1->nrec + xdf2->nrec + 2) * sizeof(char)))) { + + return -1; + } + memset(dis, 0, (xdf1->nrec + xdf2->nrec + 2) * sizeof(char)); + dis1 = dis; + dis2 = dis1 + xdf1->nrec + 1; + + for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { + hav = (*recs)->ha; + rhi = (long) XDL_HASHLONG(hav, xdf2->hbits); + for (rec = xdf2->rhash[rhi]; rec; rec = rec->next) + if (rec->ha == hav && ++dis1[i] == 2) + break; + } + + for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { + hav = (*recs)->ha; + rhi = (long) XDL_HASHLONG(hav, xdf1->hbits); + for (rec = xdf1->rhash[rhi]; rec; rec = rec->next) + if (rec->ha == hav && ++dis2[i] == 2) + break; + } + + for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; + i <= xdf1->dend; i++, recs++) { + if (dis1[i] == 1 || + (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { + xdf1->rindex[nreff] = i; + xdf1->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf1->rchg[i] = 1; + } + xdf1->nreff = nreff; + + for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; + i <= xdf2->dend; i++, recs++) { + if (dis2[i] == 1 || + (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { + xdf2->rindex[nreff] = i; + xdf2->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf2->rchg[i] = 1; + } + xdf2->nreff = nreff; + + xdl_free(dis); + + return 0; +} + + +/* + * Early trim initial and terminal matching records. + */ +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { + long i, lim; + xrecord_t **recs1, **recs2; + + recs1 = xdf1->recs; + recs2 = xdf2->recs; + for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; + i++, recs1++, recs2++) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dstart = xdf2->dstart = i; + + recs1 = xdf1->recs + xdf1->nrec - 1; + recs2 = xdf2->recs + xdf2->nrec - 1; + for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dend = xdf1->nrec - i - 1; + xdf2->dend = xdf2->nrec - i - 1; + + return 0; +} + + +static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) { + + if (xdl_trim_ends(xdf1, xdf2) < 0 || + xdl_cleanup_records(xdf1, xdf2) < 0) { + + return -1; + } + + return 0; +} + diff --git a/xdiff/xprepare.h b/xdiff/xprepare.h new file mode 100644 index 0000000000..344c569e8b --- /dev/null +++ b/xdiff/xprepare.h @@ -0,0 +1,35 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XPREPARE_H) +#define XPREPARE_H + + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +void xdl_free_env(xdfenv_t *xe); + + + +#endif /* #if !defined(XPREPARE_H) */ + diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h new file mode 100644 index 0000000000..3593a664fc --- /dev/null +++ b/xdiff/xtypes.h @@ -0,0 +1,68 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XTYPES_H) +#define XTYPES_H + + + +typedef struct s_chanode { + struct s_chanode *next; + long icurr; +} chanode_t; + +typedef struct s_chastore { + chanode_t *head, *tail; + long isize, nsize; + chanode_t *ancur; + chanode_t *sncur; + long scurr; +} chastore_t; + +typedef struct s_xrecord { + struct s_xrecord *next; + char const *ptr; + long size; + unsigned long ha; +} xrecord_t; + +typedef struct s_xdfile { + chastore_t rcha; + long nrec; + unsigned int hbits; + xrecord_t **rhash; + long dstart, dend; + xrecord_t **recs; + char *rchg; + long *rindex; + long nreff; + unsigned long *ha; +} xdfile_t; + +typedef struct s_xdfenv { + xdfile_t xdf1, xdf2; +} xdfenv_t; + + + +#endif /* #if !defined(XTYPES_H) */ + diff --git a/xdiff/xutils.c b/xdiff/xutils.c new file mode 100644 index 0000000000..8221806f78 --- /dev/null +++ b/xdiff/xutils.c @@ -0,0 +1,277 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#include "xinclude.h" + + + +#define XDL_GUESS_NLINES 256 + + + + +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb) { + mmbuffer_t mb[3]; + int i; + + mb[0].ptr = (char *) pre; + mb[0].size = psize; + mb[1].ptr = (char *) rec; + mb[1].size = size; + i = 2; + + if (!size || rec[size-1] != '\n') { + mb[2].ptr = "\n\\ No newline at end of file\n"; + mb[2].size = strlen(mb[2].ptr); + i = 3; + } + + if (ecb->outf(ecb->priv, mb, i) < 0) { + + return -1; + } + + return 0; +} + +void *xdl_mmfile_first(mmfile_t *mmf, long *size) +{ + *size = mmf->size; + return mmf->ptr; +} + + +void *xdl_mmfile_next(mmfile_t *mmf, long *size) +{ + return NULL; +} + + +long xdl_mmfile_size(mmfile_t *mmf) +{ + return mmf->size; +} + + +int xdl_cha_init(chastore_t *cha, long isize, long icount) { + + cha->head = cha->tail = NULL; + cha->isize = isize; + cha->nsize = icount * isize; + cha->ancur = cha->sncur = NULL; + cha->scurr = 0; + + return 0; +} + + +void xdl_cha_free(chastore_t *cha) { + chanode_t *cur, *tmp; + + for (cur = cha->head; (tmp = cur) != NULL;) { + cur = cur->next; + xdl_free(tmp); + } +} + + +void *xdl_cha_alloc(chastore_t *cha) { + chanode_t *ancur; + void *data; + + if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) { + if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) { + + return NULL; + } + ancur->icurr = 0; + ancur->next = NULL; + if (cha->tail) + cha->tail->next = ancur; + if (!cha->head) + cha->head = ancur; + cha->tail = ancur; + cha->ancur = ancur; + } + + data = (char *) ancur + sizeof(chanode_t) + ancur->icurr; + ancur->icurr += cha->isize; + + return data; +} + + +void *xdl_cha_first(chastore_t *cha) { + chanode_t *sncur; + + if (!(cha->sncur = sncur = cha->head)) + return NULL; + + cha->scurr = 0; + + return (char *) sncur + sizeof(chanode_t) + cha->scurr; +} + + +void *xdl_cha_next(chastore_t *cha) { + chanode_t *sncur; + + if (!(sncur = cha->sncur)) + return NULL; + cha->scurr += cha->isize; + if (cha->scurr == sncur->icurr) { + if (!(sncur = cha->sncur = sncur->next)) + return NULL; + cha->scurr = 0; + } + + return (char *) sncur + sizeof(chanode_t) + cha->scurr; +} + + +long xdl_guess_lines(mmfile_t *mf) { + long nl = 0, size, tsize = 0; + char const *data, *cur, *top; + + if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { + for (top = data + size; nl < XDL_GUESS_NLINES;) { + if (cur >= top) { + tsize += (long) (cur - data); + if (!(cur = data = xdl_mmfile_next(mf, &size))) + break; + top = data + size; + } + nl++; + if (!(cur = memchr(cur, '\n', top - cur))) + cur = top; + else + cur++; + } + tsize += (long) (cur - data); + } + + if (nl && tsize) + nl = xdl_mmfile_size(mf) / (tsize / nl); + + return nl + 1; +} + + +unsigned long xdl_hash_record(char const **data, char const *top) { + unsigned long ha = 5381; + char const *ptr = *data; + + for (; ptr < top && *ptr != '\n'; ptr++) { + ha += (ha << 5); + ha ^= (unsigned long) *ptr; + } + *data = ptr < top ? ptr + 1: ptr; + + return ha; +} + + +unsigned int xdl_hashbits(unsigned int size) { + unsigned int val = 1, bits = 0; + + for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++); + return bits ? bits: 1; +} + + +int xdl_num_out(char *out, long val) { + char *ptr, *str = out; + char buf[32]; + + ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + if (val < 0) { + *--ptr = '-'; + val = -val; + } + for (; val && ptr > buf; val /= 10) + *--ptr = "0123456789"[val % 10]; + if (*ptr) + for (; *ptr; ptr++, str++) + *str = *ptr; + else + *str++ = '0'; + *str = '\0'; + + return str - out; +} + + +long xdl_atol(char const *str, char const **next) { + long val, base; + char const *top; + + for (top = str; XDL_ISDIGIT(*top); top++); + if (next) + *next = top; + for (val = 0, base = 1, top--; top >= str; top--, base *= 10) + val += base * (long)(*top - '0'); + return val; +} + + +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb) { + int nb = 0; + mmbuffer_t mb; + char buf[128]; + + memcpy(buf, "@@ -", 4); + nb += 4; + + nb += xdl_num_out(buf + nb, c1 ? s1: 0); + + if (c1 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c1); + } + + memcpy(buf + nb, " +", 2); + nb += 2; + + nb += xdl_num_out(buf + nb, c2 ? s2: 0); + + if (c2 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c2); + } + + memcpy(buf + nb, " @@\n", 4); + nb += 4; + + mb.ptr = buf; + mb.size = nb; + if (ecb->outf(ecb->priv, &mb, 1) < 0) + return -1; + + return 0; +} + diff --git a/xdiff/xutils.h b/xdiff/xutils.h new file mode 100644 index 0000000000..428a4bb1ef --- /dev/null +++ b/xdiff/xutils.h @@ -0,0 +1,44 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ + +#if !defined(XUTILS_H) +#define XUTILS_H + + +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb); +int xdl_cha_init(chastore_t *cha, long isize, long icount); +void xdl_cha_free(chastore_t *cha); +void *xdl_cha_alloc(chastore_t *cha); +void *xdl_cha_first(chastore_t *cha); +void *xdl_cha_next(chastore_t *cha); +long xdl_guess_lines(mmfile_t *mf); +unsigned long xdl_hash_record(char const **data, char const *top); +unsigned int xdl_hashbits(unsigned int size); +int xdl_num_out(char *out, long val); +long xdl_atol(char const *str, char const **next); +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb); + + + +#endif /* #if !defined(XUTILS_H) */ +