Merge branch 'rt/zlib-smaller-window'
authorJunio C Hamano <gitster@pobox.com>
Tue, 23 Aug 2011 22:40:33 +0000 (15:40 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 23 Aug 2011 22:40:33 +0000 (15:40 -0700)
* rt/zlib-smaller-window:
test: consolidate definition of $LF
Tolerate zlib deflation with window size < 32Kb

1  2 
sha1_file.c
t/t1020-subdirectory.sh
t/test-lib.sh
diff --combined sha1_file.c
index d5616dca0809bdb9fd6a7a1980b92ded9ae6e230,475d215c14d25212ef8d5b3849b650666cb53b17..44444ae8f4a01938e4f1b08cb7a270253dfe174b
@@@ -834,7 -834,7 +834,7 @@@ static int in_window(struct pack_windo
  unsigned char *use_pack(struct packed_git *p,
                struct pack_window **w_cursor,
                off_t offset,
 -              unsigned int *left)
 +              unsigned long *left)
  {
        struct pack_window *win = *w_cursor;
  
@@@ -1186,7 -1186,7 +1186,7 @@@ static int open_sha1_file(const unsigne
        return -1;
  }
  
 -static void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 +void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
  {
        void *map;
        int fd;
@@@ -1217,14 -1217,34 +1217,34 @@@ static int experimental_loose_object(un
        unsigned int word;
  
        /*
-        * Is it a zlib-compressed buffer? If so, the first byte
-        * must be 0x78 (15-bit window size, deflated), and the
-        * first 16-bit word is evenly divisible by 31. If so,
-        * we are looking at the official format, not the experimental
-        * one.
+        * We must determine if the buffer contains the standard
+        * zlib-deflated stream or the experimental format based
+        * on the in-pack object format. Compare the header byte
+        * for each format:
+        *
+        * RFC1950 zlib w/ deflate : 0www1000 : 0 <= www <= 7
+        * Experimental pack-based : Stttssss : ttt = 1,2,3,4
+        *
+        * If bit 7 is clear and bits 0-3 equal 8, the buffer MUST be
+        * in standard loose-object format, UNLESS it is a Git-pack
+        * format object *exactly* 8 bytes in size when inflated.
+        *
+        * However, RFC1950 also specifies that the 1st 16-bit word
+        * must be divisible by 31 - this checksum tells us our buffer
+        * is in the standard format, giving a false positive only if
+        * the 1st word of the Git-pack format object happens to be
+        * divisible by 31, ie:
+        *      ((byte0 * 256) + byte1) % 31 = 0
+        *   =>        0ttt10000www1000 % 31 = 0
+        *
+        * As it happens, this case can only arise for www=3 & ttt=1
+        * - ie, a Commit object, which would have to be 8 bytes in
+        * size. As no Commit can be that small, we find that the
+        * combination of these two criteria (bitmask & checksum)
+        * can always correctly determine the buffer format.
         */
        word = (map[0] << 8) + map[1];
-       if (map[0] == 0x78 && !(word % 31))
+       if ((map[0] & 0x8F) == 0x08 && !(word % 31))
                return 0;
        else
                return 1;
@@@ -1254,7 -1274,7 +1274,7 @@@ unsigned long unpack_object_header_buff
        return used;
  }
  
 -static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
 +int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
  {
        unsigned long size, used;
        static const char valid_loose_object_type[8] = {
        return git_inflate(stream, 0);
  }
  
 -static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
 +static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
  {
        int bytes = strlen(buffer) + 1;
        unsigned char *buf = xmallocz(size);
   * too permissive for what we want to check. So do an anal
   * object header parse by hand.
   */
 -static int parse_sha1_header(const char *hdr, unsigned long *sizep)
 +int parse_sha1_header(const char *hdr, unsigned long *sizep)
  {
        char type[10];
        int i;
  static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
  {
        int ret;
 -      z_stream stream;
 +      git_zstream stream;
        char hdr[8192];
  
        ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
@@@ -1411,7 -1431,7 +1431,7 @@@ unsigned long get_size_from_delta(struc
  {
        const unsigned char *data;
        unsigned char delta_head[20], *in;
 -      z_stream stream;
 +      git_zstream stream;
        int st;
  
        memset(&stream, 0, sizeof(stream));
@@@ -1485,7 -1505,7 +1505,7 @@@ static off_t get_delta_base(struct pack
  
  /* forward declaration for a mutually recursive function */
  static int packed_object_info(struct packed_git *p, off_t offset,
 -                            unsigned long *sizep);
 +                            unsigned long *sizep, int *rtype);
  
  static int packed_delta_info(struct packed_git *p,
                             struct pack_window **w_curs,
        base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
        if (!base_offset)
                return OBJ_BAD;
 -      type = packed_object_info(p, base_offset, NULL);
 +      type = packed_object_info(p, base_offset, NULL, NULL);
        if (type <= OBJ_NONE) {
                struct revindex_entry *revidx;
                const unsigned char *base_sha1;
        return type;
  }
  
 -static int unpack_object_header(struct packed_git *p,
 -                              struct pack_window **w_curs,
 -                              off_t *curpos,
 -                              unsigned long *sizep)
 +int unpack_object_header(struct packed_git *p,
 +                       struct pack_window **w_curs,
 +                       off_t *curpos,
 +                       unsigned long *sizep)
  {
        unsigned char *base;
 -      unsigned int left;
 +      unsigned long left;
        unsigned long used;
        enum object_type type;
  
        return type;
  }
  
 -const char *packed_object_info_detail(struct packed_git *p,
 -                                    off_t obj_offset,
 -                                    unsigned long *size,
 -                                    unsigned long *store_size,
 -                                    unsigned int *delta_chain_length,
 -                                    unsigned char *base_sha1)
 -{
 -      struct pack_window *w_curs = NULL;
 -      off_t curpos;
 -      unsigned long dummy;
 -      unsigned char *next_sha1;
 -      enum object_type type;
 -      struct revindex_entry *revidx;
 -
 -      *delta_chain_length = 0;
 -      curpos = obj_offset;
 -      type = unpack_object_header(p, &w_curs, &curpos, size);
 -
 -      revidx = find_pack_revindex(p, obj_offset);
 -      *store_size = revidx[1].offset - obj_offset;
 -
 -      for (;;) {
 -              switch (type) {
 -              default:
 -                      die("pack %s contains unknown object type %d",
 -                          p->pack_name, type);
 -              case OBJ_COMMIT:
 -              case OBJ_TREE:
 -              case OBJ_BLOB:
 -              case OBJ_TAG:
 -                      unuse_pack(&w_curs);
 -                      return typename(type);
 -              case OBJ_OFS_DELTA:
 -                      obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
 -                      if (!obj_offset)
 -                              die("pack %s contains bad delta base reference of type %s",
 -                                  p->pack_name, typename(type));
 -                      if (*delta_chain_length == 0) {
 -                              revidx = find_pack_revindex(p, obj_offset);
 -                              hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
 -                      }
 -                      break;
 -              case OBJ_REF_DELTA:
 -                      next_sha1 = use_pack(p, &w_curs, curpos, NULL);
 -                      if (*delta_chain_length == 0)
 -                              hashcpy(base_sha1, next_sha1);
 -                      obj_offset = find_pack_entry_one(next_sha1, p);
 -                      break;
 -              }
 -              (*delta_chain_length)++;
 -              curpos = obj_offset;
 -              type = unpack_object_header(p, &w_curs, &curpos, &dummy);
 -      }
 -}
 -
  static int packed_object_info(struct packed_git *p, off_t obj_offset,
 -                            unsigned long *sizep)
 +                            unsigned long *sizep, int *rtype)
  {
        struct pack_window *w_curs = NULL;
        unsigned long size;
        enum object_type type;
  
        type = unpack_object_header(p, &w_curs, &curpos, &size);
 +      if (rtype)
 +              *rtype = type; /* representation type */
  
        switch (type) {
        case OBJ_OFS_DELTA:
@@@ -1593,7 -1666,7 +1613,7 @@@ static void *unpack_compressed_entry(st
                                    unsigned long size)
  {
        int st;
 -      z_stream stream;
 +      git_zstream stream;
        unsigned char *buffer, *in;
  
        buffer = xmallocz(size);
@@@ -1646,13 -1719,6 +1666,13 @@@ static unsigned long pack_entry_hash(st
        return hash % MAX_DELTA_CACHE;
  }
  
 +static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
 +{
 +      unsigned long hash = pack_entry_hash(p, base_offset);
 +      struct delta_base_cache_entry *ent = delta_base_cache + hash;
 +      return (ent->data && ent->p == p && ent->base_offset == base_offset);
 +}
 +
  static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
        unsigned long *base_size, enum object_type *type, int keep_cache)
  {
@@@ -1797,24 -1863,6 +1817,24 @@@ static void *unpack_delta_entry(struct 
        return result;
  }
  
 +static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
 +{
 +      static FILE *log_file;
 +
 +      if (!log_file) {
 +              log_file = fopen(log_pack_access, "w");
 +              if (!log_file) {
 +                      error("cannot open pack access log '%s' for writing: %s",
 +                            log_pack_access, strerror(errno));
 +                      log_pack_access = NULL;
 +                      return;
 +              }
 +      }
 +      fprintf(log_file, "%s %"PRIuMAX"\n",
 +              p->pack_name, (uintmax_t)obj_offset);
 +      fflush(log_file);
 +}
 +
  int do_check_packed_object_crc;
  
  void *unpack_entry(struct packed_git *p, off_t obj_offset,
        off_t curpos = obj_offset;
        void *data;
  
 +      if (log_pack_access)
 +              write_pack_access_log(p, obj_offset);
 +
        if (do_check_packed_object_crc && p->index_version > 1) {
                struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
                unsigned long len = revidx[1].offset - obj_offset;
@@@ -2054,7 -2099,7 +2074,7 @@@ static int sha1_loose_object_info(cons
        int status;
        unsigned long mapsize, size;
        void *map;
 -      z_stream stream;
 +      git_zstream stream;
        char hdr[32];
  
        map = map_sha1_file(sha1, &mapsize);
        return status;
  }
  
 -int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
 +/* returns enum object_type or negative */
 +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
  {
        struct cached_object *co;
        struct pack_entry e;
 -      int status;
 +      int status, rtype;
  
        co = find_cached_object(sha1);
        if (co) {
 -              if (sizep)
 -                      *sizep = co->size;
 +              if (oi->sizep)
 +                      *(oi->sizep) = co->size;
 +              oi->whence = OI_CACHED;
                return co->type;
        }
  
        if (!find_pack_entry(sha1, &e)) {
                /* Most likely it's a loose object. */
 -              status = sha1_loose_object_info(sha1, sizep);
 -              if (status >= 0)
 +              status = sha1_loose_object_info(sha1, oi->sizep);
 +              if (status >= 0) {
 +                      oi->whence = OI_LOOSE;
                        return status;
 +              }
  
                /* Not a loose object; someone else may have just packed it. */
                reprepare_packed_git();
                        return status;
        }
  
 -      status = packed_object_info(e.p, e.offset, sizep);
 +      status = packed_object_info(e.p, e.offset, oi->sizep, &rtype);
        if (status < 0) {
                mark_bad_packed_object(e.p, sha1);
 -              status = sha1_object_info(sha1, sizep);
 +              status = sha1_object_info_extended(sha1, oi);
 +      } else if (in_delta_base_cache(e.p, e.offset)) {
 +              oi->whence = OI_DBCACHED;
 +      } else {
 +              oi->whence = OI_PACKED;
 +              oi->u.packed.offset = e.offset;
 +              oi->u.packed.pack = e.p;
 +              oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
 +                                       rtype == OBJ_OFS_DELTA);
        }
  
        return status;
  }
  
 +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
 +{
 +      struct object_info oi;
 +
 +      oi.sizep = sizep;
 +      return sha1_object_info_extended(sha1, &oi);
 +}
 +
  static void *read_packed_sha1(const unsigned char *sha1,
                              enum object_type *type, unsigned long *size)
  {
@@@ -2423,7 -2448,7 +2443,7 @@@ static int write_loose_object(const uns
  {
        int fd, ret;
        unsigned char compressed[4096];
 -      z_stream stream;
 +      git_zstream stream;
        git_SHA_CTX c;
        unsigned char parano_sha1[20];
        char *filename;
  
        /* Set it up */
        memset(&stream, 0, sizeof(stream));
 -      deflateInit(&stream, zlib_compression_level);
 +      git_deflate_init(&stream, zlib_compression_level);
        stream.next_out = compressed;
        stream.avail_out = sizeof(compressed);
        git_SHA1_Init(&c);
        /* First header.. */
        stream.next_in = (unsigned char *)hdr;
        stream.avail_in = hdrlen;
 -      while (deflate(&stream, 0) == Z_OK)
 -              /* nothing */;
 +      while (git_deflate(&stream, 0) == Z_OK)
 +              ; /* nothing */
        git_SHA1_Update(&c, hdr, hdrlen);
  
        /* Then the data itself.. */
        stream.avail_in = len;
        do {
                unsigned char *in0 = stream.next_in;
 -              ret = deflate(&stream, Z_FINISH);
 +              ret = git_deflate(&stream, Z_FINISH);
                git_SHA1_Update(&c, in0, stream.next_in - in0);
                if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
                        die("unable to write sha1 file");
  
        if (ret != Z_STREAM_END)
                die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
 -      ret = deflateEnd(&stream);
 +      ret = git_deflate_end_gently(&stream);
        if (ret != Z_OK)
                die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
        git_SHA1_Final(parano_sha1, &c);
@@@ -2703,7 -2728,7 +2723,7 @@@ static int index_stream(unsigned char *
        while (size) {
                char buf[10240];
                size_t sz = size < sizeof(buf) ? size : sizeof(buf);
 -              size_t actual;
 +              ssize_t actual;
  
                actual = read_in_full(fd, buf, sz);
                if (actual < 0)
diff --combined t/t1020-subdirectory.sh
index 865b8ed26d577e154276887f88c8af9d13e62170,7e7996122850500de57e2740731b15a0703b03a4..3b1b985996e9a6b52b032ef45c5be9b1c57b60f6
@@@ -17,8 -17,6 +17,6 @@@ test_expect_success setup 
        cp one original.one &&
        cp dir/two original.two
  '
- LF='
- '
  
  test_expect_success 'update-index and ls-files' '
        git update-index --add one &&
@@@ -140,22 -138,6 +138,22 @@@ test_expect_success 'GIT_PREFIX for !al
        test_cmp expect actual
  '
  
 +test_expect_success 'GIT_PREFIX for built-ins' '
 +      # Use GIT_EXTERNAL_DIFF to test that the "diff" built-in
 +      # receives the GIT_PREFIX variable.
 +      printf "dir/" >expect &&
 +      printf "#!/bin/sh\n" >diff &&
 +      printf "printf \"\$GIT_PREFIX\"" >>diff &&
 +      chmod +x diff &&
 +      (
 +              cd dir &&
 +              printf "change" >two &&
 +              env GIT_EXTERNAL_DIFF=./diff git diff >../actual
 +              git checkout -- two
 +      ) &&
 +      test_cmp expect actual
 +'
 +
  test_expect_success 'no file/rev ambiguity check inside .git' '
        git commit -a -m 1 &&
        (
diff --combined t/test-lib.sh
index 57c3d532933d70e2ff19647442dd5bc7b459d8a1,9d746b051cfa1d936d651210ffe444ea79603e39..e27422217db28f0c6693deb0d9c093ba89a72dda
@@@ -92,6 -92,10 +92,10 @@@ _x40="$_x05$_x05$_x05$_x05$_x05$_x05$_x
  # Zero SHA-1
  _z40=0000000000000000000000000000000000000000
  
+ # Line feed
+ LF='
+ '
  # Each test should start with something like this, after copyright notices:
  #
  # test_description='Description of this test...
@@@ -444,26 -448,20 +448,26 @@@ test_debug () 
        test "$debug" = "" || eval "$1"
  }
  
 +test_eval_ () {
 +      # This is a separate function because some tests use
 +      # "return" to end a test_expect_success block early.
 +      eval >&3 2>&4 "$*"
 +}
 +
  test_run_ () {
        test_cleanup=:
        expecting_failure=$2
 -      eval >&3 2>&4 "$1"
 +      test_eval_ "$1"
        eval_ret=$?
  
        if test -z "$immediate" || test $eval_ret = 0 || test -n "$expecting_failure"
        then
 -              eval >&3 2>&4 "$test_cleanup"
 +              test_eval_ "$test_cleanup"
        fi
        if test "$verbose" = "t" && test -n "$HARNESS_ACTIVE"; then
                echo ""
        fi
 -      return 0
 +      return "$eval_ret"
  }
  
  test_skip () {
@@@ -508,7 -506,8 +512,7 @@@ test_expect_failure () 
        if ! test_skip "$@"
        then
                say >&3 "checking known breakage: $2"
 -              test_run_ "$2" expecting_failure
 -              if [ "$?" = 0 -a "$eval_ret" = 0 ]
 +              if test_run_ "$2" expecting_failure
                then
                        test_known_broken_ok_ "$1"
                else
@@@ -526,7 -525,8 +530,7 @@@ test_expect_success () 
        if ! test_skip "$@"
        then
                say >&3 "expecting success: $2"
 -              test_run_ "$2"
 -              if [ "$?" = 0 -a "$eval_ret" = 0 ]
 +              if test_run_ "$2"
                then
                        test_ok_ "$1"
                else