From: Junio C Hamano Date: Tue, 9 May 2006 23:40:28 +0000 (-0700) Subject: Merge branch 'np/delta' X-Git-Tag: v1.4.0-rc1~158 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/4edd44725c621b3a2c6c9c4d8f130ceea2ba355a?hp=-c Merge branch 'np/delta' * np/delta: improve diff-delta with sparse and/or repetitive data tiny optimization to diff-delta replace adler32 with Rabin's polynomial in diff-delta use delta index data when finding best delta matches split the diff-delta interface --- 4edd44725c621b3a2c6c9c4d8f130ceea2ba355a diff --combined Makefile index 814010d7b4,38c980bfb6..37fbe789d3 --- a/Makefile +++ b/Makefile @@@ -28,8 -28,8 +28,8 @@@ all # # Define NO_SETENV if you don't have setenv in the C library. # -# Define USE_SYMLINK_HEAD if you want .git/HEAD to be a symbolic link. -# Don't enable it on Windows. +# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link. +# Enable it on Windows. By default, symrefs are still used. # # Define PPC_SHA1 environment variable when running make to make use of # a bundled SHA1 routine optimized for PowerPC. @@@ -115,13 -115,13 +115,13 @@@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__pow SCRIPT_SH = \ git-add.sh git-bisect.sh git-branch.sh git-checkout.sh \ git-cherry.sh git-clean.sh git-clone.sh git-commit.sh \ - git-count-objects.sh git-diff.sh git-fetch.sh \ + git-fetch.sh \ git-format-patch.sh git-ls-remote.sh \ git-merge-one-file.sh git-parse-remote.sh \ - git-prune.sh git-pull.sh git-push.sh git-rebase.sh \ + git-prune.sh git-pull.sh git-rebase.sh \ git-repack.sh git-request-pull.sh git-reset.sh \ git-resolve.sh git-revert.sh git-rm.sh git-sh-setup.sh \ - git-tag.sh git-verify-tag.sh git-whatchanged.sh \ + git-tag.sh git-verify-tag.sh \ git-applymbox.sh git-applypatch.sh git-am.sh \ git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \ git-merge-resolve.sh git-merge-ours.sh git-grep.sh \ @@@ -139,7 -139,7 +139,7 @@@ SCRIPT_PYTHON = SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ $(patsubst %.perl,%,$(SCRIPT_PERL)) \ $(patsubst %.py,%,$(SCRIPT_PYTHON)) \ - git-cherry-pick git-show git-status + git-cherry-pick git-status # The ones that do not have to link with lcrypto, lz nor xdiff. SIMPLE_PROGRAMS = \ @@@ -167,8 -167,7 +167,8 @@@ PROGRAMS = git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \ git-describe$X git-merge-tree$X git-blame$X git-imap-send$X -BUILT_INS = git-log$X +BUILT_INS = git-log$X git-whatchanged$X git-show$X \ + git-count-objects$X git-diff$X git-push$X # what 'all' will build and 'install' will install, in gitexecdir ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS) @@@ -200,12 -199,12 +200,12 @@@ LIB_H = tree-walk.h log-tree.h DIFF_OBJS = \ - diff-lib.o diffcore-break.o diffcore-order.o \ + diff.o diff-lib.o diffcore-break.o diffcore-order.o \ diffcore-pickaxe.o diffcore-rename.o tree-diff.o combine-diff.o \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o base85.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ @@@ -215,7 -214,7 +215,7 @@@ $(DIFF_OBJS) BUILTIN_OBJS = \ - builtin-log.o builtin-help.o + builtin-log.o builtin-help.o builtin-count.o builtin-diff.o builtin-push.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz @@@ -264,7 -263,6 +264,7 @@@ ifeq ($(uname_O),Cygwin NO_D_TYPE_IN_DIRENT = YesPlease NO_D_INO_IN_DIRENT = YesPlease NO_STRCASESTR = YesPlease + NO_SYMLINK_HEAD = YesPlease NEEDS_LIBICONV = YesPlease # There are conflicting reports about this. # On some boxes NO_MMAP is needed, and not so elsewhere. @@@ -388,9 -386,6 +388,9 @@@ endi ifdef NO_D_INO_IN_DIRENT ALL_CFLAGS += -DNO_D_INO_IN_DIRENT endif +ifdef NO_SYMLINK_HEAD + ALL_CFLAGS += -DNO_SYMLINK_HEAD +endif ifdef NO_STRCASESTR COMPAT_CFLAGS += -DNO_STRCASESTR COMPAT_OBJS += compat/strcasestr.o @@@ -510,6 -505,9 +510,6 @@@ $(patsubst %.py,%,$(SCRIPT_PYTHON)) : git-cherry-pick: git-revert cp $< $@ -git-show: git-whatchanged - cp $< $@ - git-status: git-commit cp $< $@ @@@ -564,6 -562,10 +564,6 @@@ git-http-push$X: revision.o http.o http $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ $(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) -git-rev-list$X: rev-list.o $(LIB_FILE) - $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ - $(LIBS) $(OPENSSL_LIBSSL) - init-db.o: init-db.c $(CC) -c $(ALL_CFLAGS) \ -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c @@@ -573,12 -575,12 +573,12 @@@ $(patsubst git-%$X,%.o,$(PROGRAMS)): $( $(DIFF_OBJS): diffcore.h $(LIB_FILE): $(LIB_OBJS) - $(AR) rcs $@ $(LIB_OBJS) + rm -f $@ && $(AR) rcs $@ $(LIB_OBJS) XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o $(XDIFF_LIB): $(XDIFF_OBJS) - $(AR) rcs $@ $(XDIFF_OBJS) + rm -f $@ && $(AR) rcs $@ $(XDIFF_OBJS) doc: @@@ -607,7 -609,7 +607,7 @@@ test-date$X: test-date.c date.o ctype. $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o - $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done diff --combined pack-objects.c index 53caed42dd,5b2ef9a513..523a1c7da8 --- a/pack-objects.c +++ b/pack-objects.c @@@ -994,6 -994,7 +994,7 @@@ static int type_size_sort(const struct struct unpacked { struct object_entry *entry; void *data; + struct delta_index *index; }; /* @@@ -1004,61 -1005,56 +1005,56 @@@ * more importantly, the bigger file is likely the more recent * one. */ - static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) + static int try_delta(struct unpacked *trg, struct unpacked *src, + struct delta_index *src_index, unsigned max_depth) { - struct object_entry *cur_entry = cur->entry; - struct object_entry *old_entry = old->entry; - unsigned long size, oldsize, delta_size, sizediff; - long max_size; + struct object_entry *trg_entry = trg->entry; + struct object_entry *src_entry = src->entry; + unsigned long size, src_size, delta_size, sizediff, max_size; void *delta_buf; /* Don't bother doing diffs between different types */ - if (cur_entry->type != old_entry->type) + if (trg_entry->type != src_entry->type) return -1; /* We do not compute delta to *create* objects we are not * going to pack. */ - if (cur_entry->preferred_base) + if (trg_entry->preferred_base) return -1; - /* If the current object is at pack edge, take the depth the + /* + * If the current object is at pack edge, take the depth the * objects that depend on the current object into account -- * otherwise they would become too deep. */ - if (cur_entry->delta_child) { - if (max_depth <= cur_entry->delta_limit) + if (trg_entry->delta_child) { + if (max_depth <= trg_entry->delta_limit) return 0; - max_depth -= cur_entry->delta_limit; + max_depth -= trg_entry->delta_limit; } - - if (old_entry->depth >= max_depth) + if (src_entry->depth >= max_depth) return 0; - /* - * NOTE! - * - * We always delta from the bigger to the smaller, since that's - * more space-efficient (deletes don't have to say _what_ they - * delete). - */ - size = cur_entry->size; + /* Now some size filtering euristics. */ + size = trg_entry->size; max_size = size / 2 - 20; - if (cur_entry->delta) - max_size = cur_entry->delta_size-1; - oldsize = old_entry->size; - sizediff = oldsize < size ? size - oldsize : 0; + if (trg_entry->delta) + max_size = trg_entry->delta_size-1; + src_size = src_entry->size; + sizediff = src_size < size ? size - src_size : 0; if (sizediff >= max_size) return 0; - delta_buf = diff_delta(old->data, oldsize, - cur->data, size, &delta_size, max_size); + + delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size); if (!delta_buf) return 0; - cur_entry->delta = old_entry; - cur_entry->delta_size = delta_size; - cur_entry->depth = old_entry->depth + 1; + + trg_entry->delta = src_entry; + trg_entry->delta_size = delta_size; + trg_entry->depth = src_entry->depth + 1; free(delta_buf); - return 0; + return 1; } static void progress_interval(int signum) @@@ -1108,12 -1104,17 +1104,17 @@@ static void find_deltas(struct object_e if (entry->size < 50) continue; - + if (n->index) + free_delta_index(n->index); free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) - die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); + die("object %s inconsistent object length (%lu vs %lu)", + sha1_to_hex(entry->sha1), size, entry->size); + n->index = create_delta_index(n->data, size); + if (!n->index) + die("out of memory"); j = window; while (--j > 0) { @@@ -1124,7 -1125,7 +1125,7 @@@ m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m, depth) < 0) + if (try_delta(n, m, m->index, depth) < 0) break; } #if 0 @@@ -1144,8 -1145,11 +1145,11 @@@ if (progress) fputc('\n', stderr); - for (i = 0; i < window; ++i) + for (i = 0; i < window; ++i) { + if (array[i].index) + free_delta_index(array[i].index); free(array[i].data); + } free(array); } @@@ -1239,7 -1243,6 +1243,7 @@@ int main(int argc, char **argv setup_git_directory(); + progress = isatty(2); for (i = 1; i < argc; i++) { const char *arg = argv[i]; @@@ -1270,10 -1273,6 +1274,10 @@@ usage(pack_usage); continue; } + if (!strcmp("--progress", arg)) { + progress = 1; + continue; + } if (!strcmp("-q", arg)) { progress = 0; continue;