Merge branch 'cc/replace-object-info'
authorJunio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:32:10 +0000 (10:32 -0800)
committerJunio C Hamano <gitster@pobox.com>
Fri, 10 Jan 2014 18:32:10 +0000 (10:32 -0800)
read_sha1_file() that is the workhorse to read the contents given
an object name honoured object replacements, but there is no
corresponding mechanism to sha1_object_info() that is used to
obtain the metainfo (e.g. type & size) about the object, leading
callers to weird inconsistencies.

* cc/replace-object-info:
replace info: rename 'full' to 'long' and clarify in-code symbols
Documentation/git-replace: describe --format option
builtin/replace: unset read_replace_refs
t6050: add tests for listing with --format
builtin/replace: teach listing using short, medium or full formats
sha1_file: perform object replacement in sha1_object_info_extended()
t6050: show that git cat-file --batch fails with replace objects
sha1_object_info_extended(): add an "unsigned flags" parameter
sha1_file.c: add lookup_replace_object_extended() to pass flags
replace_object: don't check read_replace_refs twice
rename READ_SHA1_FILE_REPLACE flag to LOOKUP_REPLACE_OBJECT

1  2 
builtin/cat-file.c
sha1_file.c
diff --combined builtin/cat-file.c
index f8288c830cad215af63dd6d575c9bd4bf7aeaf6c,b15c0649e9d3b584b5c37d8b1038c4ff61965292..f12071adc4079c9bbea4a43a89b0b3bc6a4bbc48
@@@ -193,28 -193,25 +193,28 @@@ static size_t expand_format(struct strb
        return end - start + 1;
  }
  
 -static void print_object_or_die(int fd, const unsigned char *sha1,
 -                              enum object_type type, unsigned long size)
 +static void print_object_or_die(int fd, struct expand_data *data)
  {
 -      if (type == OBJ_BLOB) {
 +      const unsigned char *sha1 = data->sha1;
 +
 +      assert(data->info.typep);
 +
 +      if (data->type == OBJ_BLOB) {
                if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
                        die("unable to stream %s to stdout", sha1_to_hex(sha1));
        }
        else {
 -              enum object_type rtype;
 -              unsigned long rsize;
 +              enum object_type type;
 +              unsigned long size;
                void *contents;
  
 -              contents = read_sha1_file(sha1, &rtype, &rsize);
 +              contents = read_sha1_file(sha1, &type, &size);
                if (!contents)
                        die("object %s disappeared", sha1_to_hex(sha1));
 -              if (rtype != type)
 +              if (type != data->type)
                        die("object %s changed type!?", sha1_to_hex(sha1));
 -              if (rsize != size)
 -                      die("object %s change size!?", sha1_to_hex(sha1));
 +              if (data->info.sizep && size != data->size)
 +                      die("object %s changed size!?", sha1_to_hex(sha1));
  
                write_or_die(fd, contents, size);
                free(contents);
@@@ -241,7 -238,7 +241,7 @@@ static int batch_one_object(const char 
                return 0;
        }
  
-       if (sha1_object_info_extended(data->sha1, &data->info) < 0) {
+       if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
                printf("%s missing\n", obj_name);
                fflush(stdout);
                return 0;
        strbuf_release(&buf);
  
        if (opt->print_contents) {
 -              print_object_or_die(1, data->sha1, data->type, data->size);
 +              print_object_or_die(1, data);
                write_or_die(1, "\n", 1);
        }
        return 0;
@@@ -277,13 -274,6 +277,13 @@@ static int batch_objects(struct batch_o
        strbuf_expand(&buf, opt->format, expand_format, &data);
        data.mark_query = 0;
  
 +      /*
 +       * If we are printing out the object, then always fill in the type,
 +       * since we will want to decide whether or not to stream.
 +       */
 +      if (opt->print_contents)
 +              data.info.typep = &data.type;
 +
        /*
         * We are going to call get_sha1 on a potentially very large number of
         * objects. In most large cases, these will be actual object sha1s. The
diff --combined sha1_file.c
index daacc0cfb0132e98a98ba9645991f3d3f228d1cd,0ca6770bed8e29e606b118f33ed07520ec797b9a..ee224e412889ce437e71afbc48f111ecae66e118
@@@ -1442,6 -1442,51 +1442,6 @@@ void *map_sha1_file(const unsigned cha
        return map;
  }
  
 -/*
 - * There used to be a second loose object header format which
 - * was meant to mimic the in-pack format, allowing for direct
 - * copy of the object data.  This format turned up not to be
 - * really worth it and we no longer write loose objects in that
 - * format.
 - */
 -static int experimental_loose_object(unsigned char *map)
 -{
 -      unsigned int word;
 -
 -      /*
 -       * We must determine if the buffer contains the standard
 -       * zlib-deflated stream or the experimental format based
 -       * on the in-pack object format. Compare the header byte
 -       * for each format:
 -       *
 -       * RFC1950 zlib w/ deflate : 0www1000 : 0 <= www <= 7
 -       * Experimental pack-based : Stttssss : ttt = 1,2,3,4
 -       *
 -       * If bit 7 is clear and bits 0-3 equal 8, the buffer MUST be
 -       * in standard loose-object format, UNLESS it is a Git-pack
 -       * format object *exactly* 8 bytes in size when inflated.
 -       *
 -       * However, RFC1950 also specifies that the 1st 16-bit word
 -       * must be divisible by 31 - this checksum tells us our buffer
 -       * is in the standard format, giving a false positive only if
 -       * the 1st word of the Git-pack format object happens to be
 -       * divisible by 31, ie:
 -       *      ((byte0 * 256) + byte1) % 31 = 0
 -       *   =>        0ttt10000www1000 % 31 = 0
 -       *
 -       * As it happens, this case can only arise for www=3 & ttt=1
 -       * - ie, a Commit object, which would have to be 8 bytes in
 -       * size. As no Commit can be that small, we find that the
 -       * combination of these two criteria (bitmask & checksum)
 -       * can always correctly determine the buffer format.
 -       */
 -      word = (map[0] << 8) + map[1];
 -      if ((map[0] & 0x8F) == 0x08 && !(word % 31))
 -              return 0;
 -      else
 -              return 1;
 -}
 -
  unsigned long unpack_object_header_buffer(const unsigned char *buf,
                unsigned long len, enum object_type *type, unsigned long *sizep)
  {
  
  int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
  {
 -      unsigned long size, used;
 -      static const char valid_loose_object_type[8] = {
 -              0, /* OBJ_EXT */
 -              1, 1, 1, 1, /* "commit", "tree", "blob", "tag" */
 -              0, /* "delta" and others are invalid in a loose object */
 -      };
 -      enum object_type type;
 -
        /* Get the data stream */
        memset(stream, 0, sizeof(*stream));
        stream->next_in = map;
        stream->next_out = buffer;
        stream->avail_out = bufsiz;
  
 -      if (experimental_loose_object(map)) {
 -              /*
 -               * The old experimental format we no longer produce;
 -               * we can still read it.
 -               */
 -              used = unpack_object_header_buffer(map, mapsize, &type, &size);
 -              if (!used || !valid_loose_object_type[type])
 -                      return -1;
 -              map += used;
 -              mapsize -= used;
 -
 -              /* Set up the stream for the rest.. */
 -              stream->next_in = map;
 -              stream->avail_in = mapsize;
 -              git_inflate_init(stream);
 -
 -              /* And generate the fake traditional header */
 -              stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu",
 -                                               typename(type), size);
 -              return 0;
 -      }
        git_inflate_init(stream);
        return git_inflate(stream, 0);
  }
@@@ -2409,18 -2483,15 +2409,18 @@@ static int sha1_loose_object_info(cons
  
        /*
         * If we don't care about type or size, then we don't
 -       * need to look inside the object at all.
 +       * need to look inside the object at all. Note that we
 +       * do not optimize out the stat call, even if the
 +       * caller doesn't care about the disk-size, since our
 +       * return value implicitly indicates whether the
 +       * object even exists.
         */
        if (!oi->typep && !oi->sizep) {
 -              if (oi->disk_sizep) {
 -                      struct stat st;
 -                      if (stat_sha1_file(sha1, &st) < 0)
 -                              return -1;
 +              struct stat st;
 +              if (stat_sha1_file(sha1, &st) < 0)
 +                      return -1;
 +              if (oi->disk_sizep)
                        *oi->disk_sizep = st.st_size;
 -              }
                return 0;
        }
  
        return 0;
  }
  
- int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
+ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
  {
        struct cached_object *co;
        struct pack_entry e;
        int rtype;
+       const unsigned char *real = lookup_replace_object_extended(sha1, flags);
  
-       co = find_cached_object(sha1);
+       co = find_cached_object(real);
        if (co) {
                if (oi->typep)
                        *(oi->typep) = co->type;
                return 0;
        }
  
-       if (!find_pack_entry(sha1, &e)) {
+       if (!find_pack_entry(real, &e)) {
                /* Most likely it's a loose object. */
-               if (!sha1_loose_object_info(sha1, oi)) {
+               if (!sha1_loose_object_info(real, oi)) {
                        oi->whence = OI_LOOSE;
                        return 0;
                }
  
                /* Not a loose object; someone else may have just packed it. */
                reprepare_packed_git();
-               if (!find_pack_entry(sha1, &e))
+               if (!find_pack_entry(real, &e))
                        return -1;
        }
  
        rtype = packed_object_info(e.p, e.offset, oi);
        if (rtype < 0) {
-               mark_bad_packed_object(e.p, sha1);
-               return sha1_object_info_extended(sha1, oi);
+               mark_bad_packed_object(e.p, real);
+               return sha1_object_info_extended(real, oi, 0);
        } else if (in_delta_base_cache(e.p, e.offset)) {
                oi->whence = OI_DBCACHED;
        } else {
@@@ -2499,7 -2571,7 +2500,7 @@@ int sha1_object_info(const unsigned cha
  
        oi.typep = &type;
        oi.sizep = sizep;
-       if (sha1_object_info_extended(sha1, &oi) < 0)
+       if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0)
                return -1;
        return type;
  }
@@@ -2591,8 -2663,7 +2592,7 @@@ void *read_sha1_file_extended(const uns
        void *data;
        char *path;
        const struct packed_git *p;
-       const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE)
-               ? lookup_replace_object(sha1) : sha1;
+       const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
  
        errno = 0;
        data = read_object(repl, type, size);
@@@ -2786,9 -2857,7 +2786,9 @@@ static int create_tmpfile(char *buffer
                /* Make sure the directory exists */
                memcpy(buffer, filename, dirlen);
                buffer[dirlen-1] = 0;
 -              if (mkdir(buffer, 0777) || adjust_shared_perm(buffer))
 +              if (mkdir(buffer, 0777) && errno != EEXIST)
 +                      return -1;
 +              if (adjust_shared_perm(buffer))
                        return -1;
  
                /* Try again */