Merge branch 'np/delta'
authorJunio C Hamano <junkio@cox.net>
Tue, 9 May 2006 23:40:28 +0000 (16:40 -0700)
committerJunio C Hamano <junkio@cox.net>
Tue, 9 May 2006 23:40:28 +0000 (16:40 -0700)
* np/delta:
improve diff-delta with sparse and/or repetitive data
tiny optimization to diff-delta
replace adler32 with Rabin's polynomial in diff-delta
use delta index data when finding best delta matches
split the diff-delta interface

1  2 
Makefile
pack-objects.c
diff --combined Makefile
index 814010d7b4e5f1938e72d3d7e2ca3ceb36c96a50,38c980bfb6841847ce14cedae3012d155e243218..37fbe789d34e03fac4cc11de4f1eefb5f480d893
+++ b/Makefile
@@@ -28,8 -28,8 +28,8 @@@ all
  #
  # Define NO_SETENV if you don't have setenv in the C library.
  #
 -# Define USE_SYMLINK_HEAD if you want .git/HEAD to be a symbolic link.
 -# Don't enable it on Windows.
 +# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link.
 +# Enable it on Windows.  By default, symrefs are still used.
  #
  # Define PPC_SHA1 environment variable when running make to make use of
  # a bundled SHA1 routine optimized for PowerPC.
@@@ -115,13 -115,13 +115,13 @@@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__pow
  SCRIPT_SH = \
        git-add.sh git-bisect.sh git-branch.sh git-checkout.sh \
        git-cherry.sh git-clean.sh git-clone.sh git-commit.sh \
 -      git-count-objects.sh git-diff.sh git-fetch.sh \
 +      git-fetch.sh \
        git-format-patch.sh git-ls-remote.sh \
        git-merge-one-file.sh git-parse-remote.sh \
 -      git-prune.sh git-pull.sh git-push.sh git-rebase.sh \
 +      git-prune.sh git-pull.sh git-rebase.sh \
        git-repack.sh git-request-pull.sh git-reset.sh \
        git-resolve.sh git-revert.sh git-rm.sh git-sh-setup.sh \
 -      git-tag.sh git-verify-tag.sh git-whatchanged.sh \
 +      git-tag.sh git-verify-tag.sh \
        git-applymbox.sh git-applypatch.sh git-am.sh \
        git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \
        git-merge-resolve.sh git-merge-ours.sh git-grep.sh \
@@@ -139,7 -139,7 +139,7 @@@ SCRIPT_PYTHON = 
  SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
          $(patsubst %.perl,%,$(SCRIPT_PERL)) \
          $(patsubst %.py,%,$(SCRIPT_PYTHON)) \
 -        git-cherry-pick git-show git-status
 +        git-cherry-pick git-status
  
  # The ones that do not have to link with lcrypto, lz nor xdiff.
  SIMPLE_PROGRAMS = \
@@@ -167,8 -167,7 +167,8 @@@ PROGRAMS = 
        git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \
        git-describe$X git-merge-tree$X git-blame$X git-imap-send$X
  
 -BUILT_INS = git-log$X
 +BUILT_INS = git-log$X git-whatchanged$X git-show$X \
 +      git-count-objects$X git-diff$X git-push$X
  
  # what 'all' will build and 'install' will install, in gitexecdir
  ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS)
@@@ -200,12 -199,12 +200,12 @@@ LIB_H = 
        tree-walk.h log-tree.h
  
  DIFF_OBJS = \
 -      diff-lib.o diffcore-break.o diffcore-order.o \
 +      diff.o diff-lib.o diffcore-break.o diffcore-order.o \
        diffcore-pickaxe.o diffcore-rename.o tree-diff.o combine-diff.o \
        diffcore-delta.o log-tree.o
  
  LIB_OBJS = \
 -      blob.o commit.o connect.o csum-file.o \
 +      blob.o commit.o connect.o csum-file.o base85.o \
        date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
        object.o pack-check.o patch-delta.o path.o pkt-line.o \
        quote.o read-cache.o refs.o run-command.o \
        $(DIFF_OBJS)
  
  BUILTIN_OBJS = \
 -      builtin-log.o builtin-help.o
 +      builtin-log.o builtin-help.o builtin-count.o builtin-diff.o builtin-push.o
  
  GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
  LIBS = $(GITLIBS) -lz
@@@ -264,7 -263,6 +264,7 @@@ ifeq ($(uname_O),Cygwin
        NO_D_TYPE_IN_DIRENT = YesPlease
        NO_D_INO_IN_DIRENT = YesPlease
        NO_STRCASESTR = YesPlease
 +      NO_SYMLINK_HEAD = YesPlease
        NEEDS_LIBICONV = YesPlease
        # There are conflicting reports about this.
        # On some boxes NO_MMAP is needed, and not so elsewhere.
@@@ -388,9 -386,6 +388,9 @@@ endi
  ifdef NO_D_INO_IN_DIRENT
        ALL_CFLAGS += -DNO_D_INO_IN_DIRENT
  endif
 +ifdef NO_SYMLINK_HEAD
 +      ALL_CFLAGS += -DNO_SYMLINK_HEAD
 +endif
  ifdef NO_STRCASESTR
        COMPAT_CFLAGS += -DNO_STRCASESTR
        COMPAT_OBJS += compat/strcasestr.o
@@@ -510,6 -505,9 +510,6 @@@ $(patsubst %.py,%,$(SCRIPT_PYTHON)) : 
  git-cherry-pick: git-revert
        cp $< $@
  
 -git-show: git-whatchanged
 -      cp $< $@
 -
  git-status: git-commit
        cp $< $@
  
@@@ -564,6 -562,10 +564,6 @@@ git-http-push$X: revision.o http.o http
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
                $(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
  
 -git-rev-list$X: rev-list.o $(LIB_FILE)
 -      $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 -              $(LIBS) $(OPENSSL_LIBSSL)
 -
  init-db.o: init-db.c
        $(CC) -c $(ALL_CFLAGS) \
                -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c
@@@ -573,12 -575,12 +573,12 @@@ $(patsubst git-%$X,%.o,$(PROGRAMS)): $(
  $(DIFF_OBJS): diffcore.h
  
  $(LIB_FILE): $(LIB_OBJS)
 -      $(AR) rcs $@ $(LIB_OBJS)
 +      rm -f $@ && $(AR) rcs $@ $(LIB_OBJS)
  
  XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o
  
  $(XDIFF_LIB): $(XDIFF_OBJS)
 -      $(AR) rcs $@ $(XDIFF_OBJS)
 +      rm -f $@ && $(AR) rcs $@ $(XDIFF_OBJS)
  
  
  doc:
@@@ -607,7 -609,7 +607,7 @@@ test-date$X: test-date.c date.o ctype.
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) test-date.c date.o ctype.o
  
  test-delta$X: test-delta.c diff-delta.o patch-delta.o
-       $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz
+       $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^
  
  check:
        for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done
diff --combined pack-objects.c
index 53caed42dd2778e96cb39d8ba8b9cb175bdf1303,5b2ef9a51387dc01e9d31f08962101cf1323e3e2..523a1c7da8f1baf70d2f700dbafa8e78f9a1c4b3
@@@ -994,6 -994,7 +994,7 @@@ static int type_size_sort(const struct 
  struct unpacked {
        struct object_entry *entry;
        void *data;
+       struct delta_index *index;
  };
  
  /*
   * more importantly, the bigger file is likely the more recent
   * one.
   */
- static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
+ static int try_delta(struct unpacked *trg, struct unpacked *src,
+                    struct delta_index *src_index, unsigned max_depth)
  {
-       struct object_entry *cur_entry = cur->entry;
-       struct object_entry *old_entry = old->entry;
-       unsigned long size, oldsize, delta_size, sizediff;
-       long max_size;
+       struct object_entry *trg_entry = trg->entry;
+       struct object_entry *src_entry = src->entry;
+       unsigned long size, src_size, delta_size, sizediff, max_size;
        void *delta_buf;
  
        /* Don't bother doing diffs between different types */
-       if (cur_entry->type != old_entry->type)
+       if (trg_entry->type != src_entry->type)
                return -1;
  
        /* We do not compute delta to *create* objects we are not
         * going to pack.
         */
-       if (cur_entry->preferred_base)
+       if (trg_entry->preferred_base)
                return -1;
  
-       /* If the current object is at pack edge, take the depth the
+       /*
+        * If the current object is at pack edge, take the depth the
         * objects that depend on the current object into account --
         * otherwise they would become too deep.
         */
-       if (cur_entry->delta_child) {
-               if (max_depth <= cur_entry->delta_limit)
+       if (trg_entry->delta_child) {
+               if (max_depth <= trg_entry->delta_limit)
                        return 0;
-               max_depth -= cur_entry->delta_limit;
+               max_depth -= trg_entry->delta_limit;
        }
-       if (old_entry->depth >= max_depth)
+       if (src_entry->depth >= max_depth)
                return 0;
  
-       /*
-        * NOTE!
-        *
-        * We always delta from the bigger to the smaller, since that's
-        * more space-efficient (deletes don't have to say _what_ they
-        * delete).
-        */
-       size = cur_entry->size;
+       /* Now some size filtering euristics. */
+       size = trg_entry->size;
        max_size = size / 2 - 20;
-       if (cur_entry->delta)
-               max_size = cur_entry->delta_size-1;
-       oldsize = old_entry->size;
-       sizediff = oldsize < size ? size - oldsize : 0;
+       if (trg_entry->delta)
+               max_size = trg_entry->delta_size-1;
+       src_size = src_entry->size;
+       sizediff = src_size < size ? size - src_size : 0;
        if (sizediff >= max_size)
                return 0;
-       delta_buf = diff_delta(old->data, oldsize,
-                              cur->data, size, &delta_size, max_size);
+       delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size);
        if (!delta_buf)
                return 0;
-       cur_entry->delta = old_entry;
-       cur_entry->delta_size = delta_size;
-       cur_entry->depth = old_entry->depth + 1;
+       trg_entry->delta = src_entry;
+       trg_entry->delta_size = delta_size;
+       trg_entry->depth = src_entry->depth + 1;
        free(delta_buf);
-       return 0;
+       return 1;
  }
  
  static void progress_interval(int signum)
@@@ -1108,12 -1104,17 +1104,17 @@@ static void find_deltas(struct object_e
  
                if (entry->size < 50)
                        continue;
+               if (n->index)
+                       free_delta_index(n->index);
                free(n->data);
                n->entry = entry;
                n->data = read_sha1_file(entry->sha1, type, &size);
                if (size != entry->size)
-                       die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
+                       die("object %s inconsistent object length (%lu vs %lu)",
+                           sha1_to_hex(entry->sha1), size, entry->size);
+               n->index = create_delta_index(n->data, size);
+               if (!n->index)
+                       die("out of memory");
  
                j = window;
                while (--j > 0) {
                        m = array + other_idx;
                        if (!m->entry)
                                break;
-                       if (try_delta(n, m, depth) < 0)
+                       if (try_delta(n, m, m->index, depth) < 0)
                                break;
                }
  #if 0
        if (progress)
                fputc('\n', stderr);
  
-       for (i = 0; i < window; ++i)
+       for (i = 0; i < window; ++i) {
+               if (array[i].index)
+                       free_delta_index(array[i].index);
                free(array[i].data);
+       }
        free(array);
  }
  
@@@ -1239,7 -1243,6 +1243,7 @@@ int main(int argc, char **argv
  
        setup_git_directory();
  
 +      progress = isatty(2);
        for (i = 1; i < argc; i++) {
                const char *arg = argv[i];
  
                                        usage(pack_usage);
                                continue;
                        }
 +                      if (!strcmp("--progress", arg)) {
 +                              progress = 1;
 +                              continue;
 +                      }
                        if (!strcmp("-q", arg)) {
                                progress = 0;
                                continue;