Merge branch 'jk/oi-delta-base'

author Junio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)

committer Junio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)
author: Junio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)
committer: Junio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)
diff --combined builtin/cat-file.c

index f12071adc4079c9bbea4a43a89b0b3bc6a4bbc48,2e0af2e617e400a5c99465edd82f0dd1f8927680..d5a93e0e911ccbe4560a03ed78436bfb174e0f68
--- 1/builtin/cat-file.c
--- 2/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@@ -118,6 -118,7 +118,7 @@@ struct expand_data 
         unsigned long size;
         unsigned long disk_size;
         const char *rest;
+       unsigned char delta_base_sha1[20];
   
         /*
          * If mark_query is true, we do not expand anything, but rather
@@@ -174,6 -175,11 +175,11 @@@ static void expand_atom(struct strbuf *
                         data->split_on_whitespace = 1;
                 else if (data->rest)
                         strbuf_addstr(sb, data->rest);
+       } else if (is_atom("deltabase", atom, len)) {
+               if (data->mark_query)
+                       data->info.delta_base_sha1 = data->delta_base_sha1;
+               else
+                       strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
         } else
                 die("unknown format element: %.*s", len, atom);
   }
@@@ -193,28 -199,25 +199,28 @@@ static size_t expand_format(struct strb
         return end - start + 1;
   }
   
- -static void print_object_or_die(int fd, const unsigned char *sha1,
- -                              enum object_type type, unsigned long size)
+ +static void print_object_or_die(int fd, struct expand_data *data)
   {
- -      if (type == OBJ_BLOB) {
+ +      const unsigned char *sha1 = data->sha1;
+ +
+ +      assert(data->info.typep);
+ +
+ +      if (data->type == OBJ_BLOB) {
                 if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
                         die("unable to stream %s to stdout", sha1_to_hex(sha1));
         }
         else {
- -              enum object_type rtype;
- -              unsigned long rsize;
+ +              enum object_type type;
+ +              unsigned long size;
                 void *contents;
   
- -              contents = read_sha1_file(sha1, &rtype, &rsize);
+ +              contents = read_sha1_file(sha1, &type, &size);
                 if (!contents)
                         die("object %s disappeared", sha1_to_hex(sha1));
- -              if (rtype != type)
+ +              if (type != data->type)
                         die("object %s changed type!?", sha1_to_hex(sha1));
- -              if (rsize != size)
- -                      die("object %s change size!?", sha1_to_hex(sha1));
+ +              if (data->info.sizep && size != data->size)
+ +                      die("object %s changed size!?", sha1_to_hex(sha1));
   
                 write_or_die(fd, contents, size);
                 free(contents);
@@@ -241,7 -244,7 +247,7 @@@ static int batch_one_object(const char 
                 return 0;
         }
   
- -      if (sha1_object_info_extended(data->sha1, &data->info) < 0) {
+ +      if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
                 printf("%s missing\n", obj_name);
                 fflush(stdout);
                 return 0;
@@@ -253,7 -256,7 +259,7 @@@
         strbuf_release(&buf);
   
         if (opt->print_contents) {
- -              print_object_or_die(1, data->sha1, data->type, data->size);
+ +              print_object_or_die(1, data);
                 write_or_die(1, "\n", 1);
         }
         return 0;
@@@ -277,13 -280,6 +283,13 @@@ static int batch_objects(struct batch_o
         strbuf_expand(&buf, opt->format, expand_format, &data);
         data.mark_query = 0;
   
+ +      /*
+ +       * If we are printing out the object, then always fill in the type,
+ +       * since we will want to decide whether or not to stream.
+ +       */
+ +      if (opt->print_contents)
+ +              data.info.typep = &data.type;
+ +
         /*
          * We are going to call get_sha1 on a potentially very large number of
          * objects. In most large cases, these will be actual object sha1s. The
diff --combined cache.h

index 9ba9773edfdeb0ffc543bdb3eb02ea16eee427ef,67356dbe5434a37fc7418174187154a852249472..83a27269b89871b166286550ae9cb22c85dc7d10
--- 1/cache.h
--- 2/cache.h
+++ b/cache.h
@@@ -760,11 -760,11 +760,11 @@@ int daemon_avoid_alias(const char *path
   int offset_1st_component(const char *path);
   
   /* object replacement */
- -#define READ_SHA1_FILE_REPLACE 1
+ +#define LOOKUP_REPLACE_OBJECT 1
   extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
   static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
   {
- -      return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE);
+ +      return read_sha1_file_extended(sha1, type, size, LOOKUP_REPLACE_OBJECT);
   }
   extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1);
   static inline const unsigned char *lookup_replace_object(const unsigned char *sha1)
@@@ -773,12 -773,6 +773,12 @@@
                 return sha1;
         return do_lookup_replace_object(sha1);
   }
+ +static inline const unsigned char *lookup_replace_object_extended(const unsigned char *sha1, unsigned flag)
+ +{
+ +      if (!(flag & LOOKUP_REPLACE_OBJECT))
+ +              return sha1;
+ +      return lookup_replace_object(sha1);
+ +}
   
   /* Read and unpack a sha1 file into memory, write memory to a sha1 file */
   extern int sha1_object_info(const unsigned char *, unsigned long *);
@@@ -1080,6 -1074,7 +1080,7 @@@ struct object_info 
         enum object_type *typep;
         unsigned long *sizep;
         unsigned long *disk_sizep;
+       unsigned char *delta_base_sha1;
   
         /* Response */
         enum {
@@@ -1104,7 -1099,7 +1105,7 @@@
                 } packed;
         } u;
   };
- -extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
+ +extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
   
   /* Dumb servers support */
   extern int update_server_info(int);
diff --combined sha1_file.c

index a2ff2961b1a2a5b69707a7963411f93deb1a6f56,4e8dd8be6dcfeba1639a3490811636f9c6249c6c..e13bd2c3ee7edb4d157e499fbcea80535ec5752f
--- 1/sha1_file.c
--- 2/sha1_file.c
+++ b/sha1_file.c
@@@ -807,38 -807,15 +807,38 @@@ void free_pack_by_name(const char *pack
   static unsigned int get_max_fd_limit(void)
   {
   #ifdef RLIMIT_NOFILE
- -      struct rlimit lim;
+ +      {
+ +              struct rlimit lim;
   
- -      if (getrlimit(RLIMIT_NOFILE, &lim))
- -              die_errno("cannot get RLIMIT_NOFILE");
+ +              if (!getrlimit(RLIMIT_NOFILE, &lim))
+ +                      return lim.rlim_cur;
+ +      }
+ +#endif
   
- -      return lim.rlim_cur;
- -#elif defined(_SC_OPEN_MAX)
- -      return sysconf(_SC_OPEN_MAX);
- -#elif defined(OPEN_MAX)
+ +#ifdef _SC_OPEN_MAX
+ +      {
+ +              long open_max = sysconf(_SC_OPEN_MAX);
+ +              if (0 < open_max)
+ +                      return open_max;
+ +              /*
+ +               * Otherwise, we got -1 for one of the two
+ +               * reasons:
+ +               *
+ +               * (1) sysconf() did not understand _SC_OPEN_MAX
+ +               *     and signaled an error with -1; or
+ +               * (2) sysconf() said there is no limit.
+ +               *
+ +               * We _could_ clear errno before calling sysconf() to
+ +               * tell these two cases apart and return a huge number
+ +               * in the latter case to let the caller cap it to a
+ +               * value that is not so selfish, but letting the
+ +               * fallback OPEN_MAX codepath take care of these cases
+ +               * is a lot simpler.
+ +               */
+ +      }
+ +#endif
+ +
+ +#ifdef OPEN_MAX
         return OPEN_MAX;
   #else
         return 1; /* see the caller ;-) */
@@@ -1690,6 -1667,38 +1690,38 @@@ static off_t get_delta_base(struct pack
         return base_offset;
   }
   
+ /*
+  * Like get_delta_base above, but we return the sha1 instead of the pack
+  * offset. This means it is cheaper for REF deltas (we do not have to do
+  * the final object lookup), but more expensive for OFS deltas (we
+  * have to load the revidx to convert the offset back into a sha1).
+  */
+ static const unsigned char *get_delta_base_sha1(struct packed_git *p,
+                                               struct pack_window **w_curs,
+                                               off_t curpos,
+                                               enum object_type type,
+                                               off_t delta_obj_offset)
+ {
+       if (type == OBJ_REF_DELTA) {
+               unsigned char *base = use_pack(p, w_curs, curpos, NULL);
+               return base;
+       } else if (type == OBJ_OFS_DELTA) {
+               struct revindex_entry *revidx;
+               off_t base_offset = get_delta_base(p, w_curs, &curpos,
+                                                  type, delta_obj_offset);
+ 
+               if (!base_offset)
+                       return NULL;
+ 
+               revidx = find_pack_revindex(p, base_offset);
+               if (!revidx)
+                       return NULL;
+ 
+               return nth_packed_object_sha1(p, revidx->nr);
+       } else
+               return NULL;
+ }
+ 
   int unpack_object_header(struct packed_git *p,
                          struct pack_window **w_curs,
                          off_t *curpos,
@@@ -1847,6 -1856,22 +1879,22 @@@ static int packed_object_info(struct pa
                 }
         }
   
+       if (oi->delta_base_sha1) {
+               if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
+                       const unsigned char *base;
+ 
+                       base = get_delta_base_sha1(p, &w_curs, curpos,
+                                                  type, obj_offset);
+                       if (!base) {
+                               type = OBJ_BAD;
+                               goto out;
+                       }
+ 
+                       hashcpy(oi->delta_base_sha1, base);
+               } else
+                       hashclr(oi->delta_base_sha1);
+       }
+ 
   out:
         unuse_pack(&w_curs);
         return type;
@@@ -2430,6 -2455,9 +2478,9 @@@ static int sha1_loose_object_info(cons
         git_zstream stream;
         char hdr[32];
   
+       if (oi->delta_base_sha1)
+               hashclr(oi->delta_base_sha1);
+ 
         /*
          * If we don't care about type or size, then we don't
          * need to look inside the object at all. Note that we
@@@ -2466,14 -2494,13 +2517,14 @@@
         return 0;
   }
   
- -int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
+ +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
   {
         struct cached_object *co;
         struct pack_entry e;
         int rtype;
+ +      const unsigned char *real = lookup_replace_object_extended(sha1, flags);
   
- -      co = find_cached_object(sha1);
+ +      co = find_cached_object(real);
         if (co) {
                 if (oi->typep)
                         *(oi->typep) = co->type;
@@@ -2481,27 -2508,29 +2532,29 @@@
                         *(oi->sizep) = co->size;
                 if (oi->disk_sizep)
                         *(oi->disk_sizep) = 0;
+               if (oi->delta_base_sha1)
+                       hashclr(oi->delta_base_sha1);
                 oi->whence = OI_CACHED;
                 return 0;
         }
   
- -      if (!find_pack_entry(sha1, &e)) {
+ +      if (!find_pack_entry(real, &e)) {
                 /* Most likely it's a loose object. */
- -              if (!sha1_loose_object_info(sha1, oi)) {
+ +              if (!sha1_loose_object_info(real, oi)) {
                         oi->whence = OI_LOOSE;
                         return 0;
                 }
   
                 /* Not a loose object; someone else may have just packed it. */
                 reprepare_packed_git();
- -              if (!find_pack_entry(sha1, &e))
+ +              if (!find_pack_entry(real, &e))
                         return -1;
         }
   
         rtype = packed_object_info(e.p, e.offset, oi);
         if (rtype < 0) {
- -              mark_bad_packed_object(e.p, sha1);
- -              return sha1_object_info_extended(sha1, oi);
+ +              mark_bad_packed_object(e.p, real);
+ +              return sha1_object_info_extended(real, oi, 0);
         } else if (in_delta_base_cache(e.p, e.offset)) {
                 oi->whence = OI_DBCACHED;
         } else {
@@@ -2523,7 -2552,7 +2576,7 @@@ int sha1_object_info(const unsigned cha
   
         oi.typep = &type;
         oi.sizep = sizep;
- -      if (sha1_object_info_extended(sha1, &oi) < 0)
+ +      if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0)
                 return -1;
         return type;
   }
@@@ -2615,7 -2644,8 +2668,7 @@@ void *read_sha1_file_extended(const uns
         void *data;
         char *path;
         const struct packed_git *p;
- -      const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE)
- -              ? lookup_replace_object(sha1) : sha1;
+ +      const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
   
         errno = 0;
         data = read_object(repl, type, size);
diff --combined t/t1006-cat-file.sh

index 1687098355b710850a04769512320930d9b8b27b,633dc825ec49eb98be10c8f75545a6978c290231..a72e700ae434b07b08b48a68d4c0ebaa8173002b
--- 1/t/t1006-cat-file.sh
--- 2/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@@ -85,28 -85,6 +85,28 @@@ $content
                 git cat-file --batch-check="%(objecttype) %(rest)" >actual &&
         test_cmp expect actual
       '
+ +
+ +    test -z "$content" ||
+ +    test_expect_success "--batch without type ($type)" '
+ +      {
+ +              echo "$size" &&
+ +              maybe_remove_timestamp "$content" $no_ts
+ +      } >expect &&
+ +      echo $sha1 | git cat-file --batch="%(objectsize)" >actual.full &&
+ +      maybe_remove_timestamp "$(cat actual.full)" $no_ts >actual &&
+ +      test_cmp expect actual
+ +    '
+ +
+ +    test -z "$content" ||
+ +    test_expect_success "--batch without size ($type)" '
+ +      {
+ +              echo "$type" &&
+ +              maybe_remove_timestamp "$content" $no_ts
+ +      } >expect &&
+ +      echo $sha1 | git cat-file --batch="%(objecttype)" >actual.full &&
+ +      maybe_remove_timestamp "$(cat actual.full)" $no_ts >actual &&
+ +      test_cmp expect actual
+ +    '
   }
   
   hello_content="Hello World"
@@@ -262,4 -240,38 +262,38 @@@ test_expect_success "--batch-check wit
       "$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
   '
   
+ test_expect_success 'setup blobs which are likely to delta' '
+       test-genrandom foo 10240 >foo &&
+       { cat foo; echo plus; } >foo-plus &&
+       git add foo foo-plus &&
+       git commit -m foo &&
+       cat >blobs <<-\EOF
+       HEAD:foo
+       HEAD:foo-plus
+       EOF
+ '
+ 
+ test_expect_success 'confirm that neither loose blob is a delta' '
+       cat >expect <<-EOF
+       $_z40
+       $_z40
+       EOF
+       git cat-file --batch-check="%(deltabase)" <blobs >actual &&
+       test_cmp expect actual
+ '
+ 
+ # To avoid relying too much on the current delta heuristics,
+ # we will check only that one of the two objects is a delta
+ # against the other, but not the order. We can do so by just
+ # asking for the base of both, and checking whether either
+ # sha1 appears in the output.
+ test_expect_success '%(deltabase) reports packed delta bases' '
+       git repack -ad &&
+       git cat-file --batch-check="%(deltabase)" <blobs >actual &&
+       {
+               grep "$(git rev-parse HEAD:foo)" actual ||
+               grep "$(git rev-parse HEAD:foo-plus)" actual
+       }
+ '
+ 
   test_done
author	Junio C Hamano <gitster@pobox.com>
author	Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)
committer	Junio C Hamano <gitster@pobox.com>
committer	Fri, 10 Jan 2014 18:33:11 +0000 (10:33 -0800)
		1	2
builtin/cat-file.c	patch \|	diff1 \|	diff2 \|	blob \| history
cache.h	patch \|	diff1 \|	diff2 \|	blob \| history
sha1_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
t/t1006-cat-file.sh	patch \|	diff1 \|	diff2 \|	blob \| history