Merge branch 'ew/fd-cloexec-fix'
authorJunio C Hamano <gitster@pobox.com>
Thu, 20 Jul 2017 23:30:00 +0000 (16:30 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 20 Jul 2017 23:30:00 +0000 (16:30 -0700)
Portability/fallback fix.

* ew/fd-cloexec-fix:
set FD_CLOEXEC properly when O_CLOEXEC is not supported

1  2 
sha1_file.c
diff --combined sha1_file.c
index fca165f13ccd145c184eaa8601bd60156d5b073b,5e3b6e411a5f7750d36ff18a4c27a199e3993685..b60ae15f7068c157df6407933ea7ee94a53f0640
@@@ -7,7 -7,6 +7,7 @@@
   * creation etc.
   */
  #include "cache.h"
 +#include "config.h"
  #include "string-list.h"
  #include "lockfile.h"
  #include "delta.h"
  #include "mru.h"
  #include "list.h"
  #include "mergesort.h"
 -
 -#ifndef O_NOATIME
 -#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
 -#define O_NOATIME 01000000
 -#else
 -#define O_NOATIME 0
 -#endif
 -#endif
 +#include "quote.h"
  
  #define SZ_FMT PRIuMAX
  static inline uintmax_t sz_fmt(size_t s) { return s; }
@@@ -130,10 -136,8 +130,10 @@@ enum scld_error safe_create_leading_dir
                *slash = '\0';
                if (!stat(path, &st)) {
                        /* path exists */
 -                      if (!S_ISDIR(st.st_mode))
 +                      if (!S_ISDIR(st.st_mode)) {
 +                              errno = ENOTDIR;
                                ret = SCLD_EXISTS;
 +                      }
                } else if (mkdir(path, 0777)) {
                        if (errno == EEXIST &&
                            !stat(path, &st) && S_ISDIR(st.st_mode))
  
  enum scld_error safe_create_leading_directories_const(const char *path)
  {
 +      int save_errno;
        /* path points to cache entries, so xstrdup before messing with it */
        char *buf = xstrdup(path);
        enum scld_error result = safe_create_leading_directories(buf);
 +
 +      save_errno = errno;
        free(buf);
 +      errno = save_errno;
        return result;
  }
  
 +int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
 +{
 +      /*
 +       * The number of times we will try to remove empty directories
 +       * in the way of path. This is only 1 because if another
 +       * process is racily creating directories that conflict with
 +       * us, we don't want to fight against them.
 +       */
 +      int remove_directories_remaining = 1;
 +
 +      /*
 +       * The number of times that we will try to create the
 +       * directories containing path. We are willing to attempt this
 +       * more than once, because another process could be trying to
 +       * clean up empty directories at the same time as we are
 +       * trying to create them.
 +       */
 +      int create_directories_remaining = 3;
 +
 +      /* A scratch copy of path, filled lazily if we need it: */
 +      struct strbuf path_copy = STRBUF_INIT;
 +
 +      int ret, save_errno;
 +
 +      /* Sanity check: */
 +      assert(*path);
 +
 +retry_fn:
 +      ret = fn(path, cb);
 +      save_errno = errno;
 +      if (!ret)
 +              goto out;
 +
 +      if (errno == EISDIR && remove_directories_remaining-- > 0) {
 +              /*
 +               * A directory is in the way. Maybe it is empty; try
 +               * to remove it:
 +               */
 +              if (!path_copy.len)
 +                      strbuf_addstr(&path_copy, path);
 +
 +              if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
 +                      goto retry_fn;
 +      } else if (errno == ENOENT && create_directories_remaining-- > 0) {
 +              /*
 +               * Maybe the containing directory didn't exist, or
 +               * maybe it was just deleted by a process that is
 +               * racing with us to clean up empty directories. Try
 +               * to create it:
 +               */
 +              enum scld_error scld_result;
 +
 +              if (!path_copy.len)
 +                      strbuf_addstr(&path_copy, path);
 +
 +              do {
 +                      scld_result = safe_create_leading_directories(path_copy.buf);
 +                      if (scld_result == SCLD_OK)
 +                              goto retry_fn;
 +              } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
 +      }
 +
 +out:
 +      strbuf_release(&path_copy);
 +      errno = save_errno;
 +      return ret;
 +}
 +
  static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
  {
        int i;
@@@ -278,26 -210,31 +278,26 @@@ static const char *alt_sha1_path(struc
        return buf->buf;
  }
  
 -/*
 - * Return the name of the pack or index file with the specified sha1
 - * in its filename.  *base and *name are scratch space that must be
 - * provided by the caller.  which should be "pack" or "idx".
 - */
 -static char *sha1_get_pack_name(const unsigned char *sha1,
 -                              struct strbuf *buf,
 -                              const char *which)
 + char *odb_pack_name(struct strbuf *buf,
 +                   const unsigned char *sha1,
 +                   const char *ext)
  {
        strbuf_reset(buf);
        strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
 -                  sha1_to_hex(sha1), which);
 +                  sha1_to_hex(sha1), ext);
        return buf->buf;
  }
  
  char *sha1_pack_name(const unsigned char *sha1)
  {
        static struct strbuf buf = STRBUF_INIT;
 -      return sha1_get_pack_name(sha1, &buf, "pack");
 +      return odb_pack_name(&buf, sha1, "pack");
  }
  
  char *sha1_pack_index_name(const unsigned char *sha1)
  {
        static struct strbuf buf = STRBUF_INIT;
 -      return sha1_get_pack_name(sha1, &buf, "idx");
 +      return odb_pack_name(&buf, sha1, "idx");
  }
  
  struct alternate_object_database *alt_odb_list;
@@@ -354,12 -291,12 +354,12 @@@ static int link_alt_odb_entry(const cha
        struct strbuf pathbuf = STRBUF_INIT;
  
        if (!is_absolute_path(entry) && relative_base) {
 -              strbuf_addstr(&pathbuf, real_path(relative_base));
 +              strbuf_realpath(&pathbuf, relative_base, 1);
                strbuf_addch(&pathbuf, '/');
        }
        strbuf_addstr(&pathbuf, entry);
  
 -      if (strbuf_normalize_path(&pathbuf) < 0) {
 +      if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
                error("unable to normalize alternate object path: %s",
                      pathbuf.buf);
                strbuf_release(&pathbuf);
        return 0;
  }
  
 +static const char *parse_alt_odb_entry(const char *string,
 +                                     int sep,
 +                                     struct strbuf *out)
 +{
 +      const char *end;
 +
 +      strbuf_reset(out);
 +
 +      if (*string == '#') {
 +              /* comment; consume up to next separator */
 +              end = strchrnul(string, sep);
 +      } else if (*string == '"' && !unquote_c_style(out, string, &end)) {
 +              /*
 +               * quoted path; unquote_c_style has copied the
 +               * data for us and set "end". Broken quoting (e.g.,
 +               * an entry that doesn't end with a quote) falls
 +               * back to the unquoted case below.
 +               */
 +      } else {
 +              /* normal, unquoted path */
 +              end = strchrnul(string, sep);
 +              strbuf_add(out, string, end - string);
 +      }
 +
 +      if (*end)
 +              end++;
 +      return end;
 +}
 +
  static void link_alt_odb_entries(const char *alt, int len, int sep,
                                 const char *relative_base, int depth)
  {
 -      struct string_list entries = STRING_LIST_INIT_NODUP;
 -      char *alt_copy;
 -      int i;
        struct strbuf objdirbuf = STRBUF_INIT;
 +      struct strbuf entry = STRBUF_INIT;
  
        if (depth > 5) {
                error("%s: ignoring alternate object stores, nesting too deep.",
                die("unable to normalize object directory: %s",
                    objdirbuf.buf);
  
 -      alt_copy = xmemdupz(alt, len);
 -      string_list_split_in_place(&entries, alt_copy, sep, -1);
 -      for (i = 0; i < entries.nr; i++) {
 -              const char *entry = entries.items[i].string;
 -              if (entry[0] == '\0' || entry[0] == '#')
 +      while (*alt) {
 +              alt = parse_alt_odb_entry(alt, sep, &entry);
 +              if (!entry.len)
                        continue;
 -              link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf);
 +              link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf);
        }
 -      string_list_clear(&entries, 0);
 -      free(alt_copy);
 +      strbuf_release(&entry);
        strbuf_release(&objdirbuf);
  }
  
@@@ -611,7 -524,8 +611,7 @@@ char *compute_alternate_path(const cha
  
  out:
        if (seen_error) {
 -              free(ref_git);
 -              ref_git = NULL;
 +              FREE_AND_NULL(ref_git);
        }
  
        return ref_git;
@@@ -662,7 -576,7 +662,7 @@@ static int freshen_file(const char *fn
   * either does not exist on disk, or has a stale mtime and may be subject to
   * pruning).
   */
 -static int check_and_freshen_file(const char *fn, int freshen)
 +int check_and_freshen_file(const char *fn, int freshen)
  {
        if (access(fn, F_OK))
                return 0;
@@@ -1496,32 -1410,6 +1496,32 @@@ static void prepare_packed_git_one(cha
        strbuf_release(&path);
  }
  
 +static int approximate_object_count_valid;
 +
 +/*
 + * Give a fast, rough count of the number of objects in the repository. This
 + * ignores loose objects completely. If you have a lot of them, then either
 + * you should repack because your performance will be awful, or they are
 + * all unreachable objects about to be pruned, in which case they're not really
 + * interesting as a measure of repo size in the first place.
 + */
 +unsigned long approximate_object_count(void)
 +{
 +      static unsigned long count;
 +      if (!approximate_object_count_valid) {
 +              struct packed_git *p;
 +
 +              prepare_packed_git();
 +              count = 0;
 +              for (p = packed_git; p; p = p->next) {
 +                      if (open_pack_index(p))
 +                              continue;
 +                      count += p->num_objects;
 +              }
 +      }
 +      return count;
 +}
 +
  static void *get_next_packed_git(const void *p)
  {
        return ((const struct packed_git *)p)->next;
@@@ -1593,7 -1481,6 +1593,7 @@@ void prepare_packed_git(void
  
  void reprepare_packed_git(void)
  {
 +      approximate_object_count_valid = 0;
        prepare_packed_git_run_once = 0;
        prepare_packed_git();
  }
@@@ -1606,7 -1493,7 +1606,7 @@@ static void mark_bad_packed_object(stru
                if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
                        return;
        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
 -                                    st_mult(GIT_SHA1_RAWSZ,
 +                                    st_mult(GIT_MAX_RAWSZ,
                                              st_add(p->num_bad_objects, 1)));
        hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
        p->num_bad_objects++;
@@@ -1684,14 -1571,14 +1684,14 @@@ int git_open_cloexec(const char *name, 
                fd = open(name, flags | o_cloexec);
        }
  
- #if defined(F_GETFL) && defined(F_SETFL) && defined(FD_CLOEXEC)
+ #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
        {
                static int fd_cloexec = FD_CLOEXEC;
  
                if (!o_cloexec && 0 <= fd && fd_cloexec) {
                        /* Opened w/o O_CLOEXEC?  try with fcntl(2) to add it */
-                       int flags = fcntl(fd, F_GETFL);
-                       if (fcntl(fd, F_SETFL, flags | fd_cloexec))
+                       int flags = fcntl(fd, F_GETFD);
+                       if (fcntl(fd, F_SETFD, flags | fd_cloexec))
                                fd_cloexec = 0;
                }
        }
        return fd;
  }
  
 -int git_open(const char *name)
 -{
 -      static int noatime = O_NOATIME;
 -      int fd = git_open_cloexec(name, O_RDONLY);
 -
 -      if (0 <= fd && (noatime & O_NOATIME)) {
 -              int flags = fcntl(fd, F_GETFL);
 -              if (fcntl(fd, F_SETFL, flags | noatime))
 -                      noatime = 0;
 -      }
 -      return fd;
 -}
 -
 -static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
 +/*
 + * Find "sha1" as a loose object in the local repository or in an alternate.
 + * Returns 0 on success, negative on failure.
 + *
 + * The "path" out-parameter will give the path of the object we found (if any).
 + * Note that it may point to static storage and is only valid until another
 + * call to sha1_file_name(), etc.
 + */
 +static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
 +                        const char **path)
  {
        struct alternate_object_database *alt;
  
 -      if (!lstat(sha1_file_name(sha1), st))
 +      *path = sha1_file_name(sha1);
 +      if (!lstat(*path, st))
                return 0;
  
        prepare_alt_odb();
        errno = ENOENT;
        for (alt = alt_odb_list; alt; alt = alt->next) {
 -              const char *path = alt_sha1_path(alt, sha1);
 -              if (!lstat(path, st))
 +              *path = alt_sha1_path(alt, sha1);
 +              if (!lstat(*path, st))
                        return 0;
        }
  
        return -1;
  }
  
 -static int open_sha1_file(const unsigned char *sha1)
 +/*
 + * Like stat_sha1_file(), but actually open the object and return the
 + * descriptor. See the caveats on the "path" parameter above.
 + */
 +static int open_sha1_file(const unsigned char *sha1, const char **path)
  {
        int fd;
        struct alternate_object_database *alt;
        int most_interesting_errno;
  
 -      fd = git_open(sha1_file_name(sha1));
 +      *path = sha1_file_name(sha1);
 +      fd = git_open(*path);
        if (fd >= 0)
                return fd;
        most_interesting_errno = errno;
  
        prepare_alt_odb();
        for (alt = alt_odb_list; alt; alt = alt->next) {
 -              const char *path = alt_sha1_path(alt, sha1);
 -              fd = git_open(path);
 +              *path = alt_sha1_path(alt, sha1);
 +              fd = git_open(*path);
                if (fd >= 0)
                        return fd;
                if (most_interesting_errno == ENOENT)
        return -1;
  }
  
 -void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 +/*
 + * Map the loose object at "path" if it is not NULL, or the path found by
 + * searching for a loose object named "sha1".
 + */
 +static void *map_sha1_file_1(const char *path,
 +                           const unsigned char *sha1,
 +                           unsigned long *size)
  {
        void *map;
        int fd;
  
 -      fd = open_sha1_file(sha1);
 +      if (path)
 +              fd = git_open(path);
 +      else
 +              fd = open_sha1_file(sha1, &path);
        map = NULL;
        if (fd >= 0) {
                struct stat st;
                        *size = xsize_t(st.st_size);
                        if (!*size) {
                                /* mmap() is forbidden on empty files */
 -                              error("object file %s is empty", sha1_file_name(sha1));
 +                              error("object file %s is empty", path);
                                return NULL;
                        }
                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
        return map;
  }
  
 +void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 +{
 +      return map_sha1_file_1(NULL, sha1, size);
 +}
 +
  unsigned long unpack_object_header_buffer(const unsigned char *buf,
                unsigned long len, enum object_type *type, unsigned long *sizep)
  {
@@@ -1964,7 -1835,7 +1964,7 @@@ static int parse_sha1_header_extended(c
         * we're obtaining the type using '--allow-unknown-type'
         * option.
         */
 -      if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
 +      if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0))
                type = 0;
        else if (type < 0)
                die("invalid object type");
@@@ -2002,7 -1873,20 +2002,7 @@@ int parse_sha1_header(const char *hdr, 
        struct object_info oi = OBJECT_INFO_INIT;
  
        oi.sizep = sizep;
 -      return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
 -}
 -
 -static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
 -{
 -      int ret;
 -      git_zstream stream;
 -      char hdr[8192];
 -
 -      ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
 -      if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0)
 -              return NULL;
 -
 -      return unpack_sha1_rest(&stream, hdr, *size, sha1);
 +      return parse_sha1_header_extended(hdr, &oi, 0);
  }
  
  unsigned long get_size_from_delta(struct packed_git *p,
@@@ -2226,6 -2110,107 +2226,6 @@@ unwind
        goto out;
  }
  
 -int packed_object_info(struct packed_git *p, off_t obj_offset,
 -                     struct object_info *oi)
 -{
 -      struct pack_window *w_curs = NULL;
 -      unsigned long size;
 -      off_t curpos = obj_offset;
 -      enum object_type type;
 -
 -      /*
 -       * We always get the representation type, but only convert it to
 -       * a "real" type later if the caller is interested.
 -       */
 -      type = unpack_object_header(p, &w_curs, &curpos, &size);
 -
 -      if (oi->sizep) {
 -              if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
 -                      off_t tmp_pos = curpos;
 -                      off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
 -                                                         type, obj_offset);
 -                      if (!base_offset) {
 -                              type = OBJ_BAD;
 -                              goto out;
 -                      }
 -                      *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
 -                      if (*oi->sizep == 0) {
 -                              type = OBJ_BAD;
 -                              goto out;
 -                      }
 -              } else {
 -                      *oi->sizep = size;
 -              }
 -      }
 -
 -      if (oi->disk_sizep) {
 -              struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
 -              *oi->disk_sizep = revidx[1].offset - obj_offset;
 -      }
 -
 -      if (oi->typep) {
 -              *oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
 -              if (*oi->typep < 0) {
 -                      type = OBJ_BAD;
 -                      goto out;
 -              }
 -      }
 -
 -      if (oi->delta_base_sha1) {
 -              if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
 -                      const unsigned char *base;
 -
 -                      base = get_delta_base_sha1(p, &w_curs, curpos,
 -                                                 type, obj_offset);
 -                      if (!base) {
 -                              type = OBJ_BAD;
 -                              goto out;
 -                      }
 -
 -                      hashcpy(oi->delta_base_sha1, base);
 -              } else
 -                      hashclr(oi->delta_base_sha1);
 -      }
 -
 -out:
 -      unuse_pack(&w_curs);
 -      return type;
 -}
 -
 -static void *unpack_compressed_entry(struct packed_git *p,
 -                                  struct pack_window **w_curs,
 -                                  off_t curpos,
 -                                  unsigned long size)
 -{
 -      int st;
 -      git_zstream stream;
 -      unsigned char *buffer, *in;
 -
 -      buffer = xmallocz_gently(size);
 -      if (!buffer)
 -              return NULL;
 -      memset(&stream, 0, sizeof(stream));
 -      stream.next_out = buffer;
 -      stream.avail_out = size + 1;
 -
 -      git_inflate_init(&stream);
 -      do {
 -              in = use_pack(p, w_curs, curpos, &stream.avail_in);
 -              stream.next_in = in;
 -              st = git_inflate(&stream, Z_FINISH);
 -              if (!stream.avail_out)
 -                      break; /* the payload is larger than it should be */
 -              curpos += stream.next_in - in;
 -      } while (st == Z_OK || st == Z_BUF_ERROR);
 -      git_inflate_end(&stream);
 -      if ((st != Z_STREAM_END) || stream.total_out != size) {
 -              free(buffer);
 -              return NULL;
 -      }
 -
 -      return buffer;
 -}
 -
  static struct hashmap delta_base_cache;
  static size_t delta_base_cached;
  
@@@ -2275,8 -2260,7 +2275,8 @@@ static int delta_base_cache_key_eq(cons
        return a->p == b->p && a->base_offset == b->base_offset;
  }
  
 -static int delta_base_cache_hash_cmp(const void *va, const void *vb,
 +static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
 +                                   const void *va, const void *vb,
                                     const void *vkey)
  {
        const struct delta_base_cache_entry *a = va, *b = vb;
@@@ -2314,10 -2298,8 +2314,10 @@@ static void *cache_or_unpack_entry(stru
        if (!ent)
                return unpack_entry(p, base_offset, type, base_size);
  
 -      *type = ent->type;
 -      *base_size = ent->size;
 +      if (type)
 +              *type = ent->type;
 +      if (base_size)
 +              *base_size = ent->size;
        return xmemdupz(ent->data, ent->size);
  }
  
@@@ -2329,10 -2311,11 +2329,10 @@@ static inline void release_delta_base_c
  
  void clear_delta_base_cache(void)
  {
 -      struct hashmap_iter iter;
 -      struct delta_base_cache_entry *entry;
 -      for (entry = hashmap_iter_first(&delta_base_cache, &iter);
 -           entry;
 -           entry = hashmap_iter_next(&iter)) {
 +      struct list_head *lru, *tmp;
 +      list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
 +              struct delta_base_cache_entry *entry =
 +                      list_entry(lru, struct delta_base_cache_entry, lru);
                release_delta_base_cache(entry);
        }
  }
@@@ -2361,128 -2344,11 +2361,128 @@@ static void add_delta_base_cache(struc
        list_add_tail(&ent->lru, &delta_base_cache_lru);
  
        if (!delta_base_cache.cmpfn)
 -              hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, 0);
 +              hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
        hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
        hashmap_add(&delta_base_cache, ent);
  }
  
 +int packed_object_info(struct packed_git *p, off_t obj_offset,
 +                     struct object_info *oi)
 +{
 +      struct pack_window *w_curs = NULL;
 +      unsigned long size;
 +      off_t curpos = obj_offset;
 +      enum object_type type;
 +
 +      /*
 +       * We always get the representation type, but only convert it to
 +       * a "real" type later if the caller is interested.
 +       */
 +      if (oi->contentp) {
 +              *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
 +                                                    &type);
 +              if (!*oi->contentp)
 +                      type = OBJ_BAD;
 +      } else {
 +              type = unpack_object_header(p, &w_curs, &curpos, &size);
 +      }
 +
 +      if (!oi->contentp && oi->sizep) {
 +              if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
 +                      off_t tmp_pos = curpos;
 +                      off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
 +                                                         type, obj_offset);
 +                      if (!base_offset) {
 +                              type = OBJ_BAD;
 +                              goto out;
 +                      }
 +                      *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
 +                      if (*oi->sizep == 0) {
 +                              type = OBJ_BAD;
 +                              goto out;
 +                      }
 +              } else {
 +                      *oi->sizep = size;
 +              }
 +      }
 +
 +      if (oi->disk_sizep) {
 +              struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
 +              *oi->disk_sizep = revidx[1].offset - obj_offset;
 +      }
 +
 +      if (oi->typep || oi->typename) {
 +              enum object_type ptot;
 +              ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
 +                                           curpos);
 +              if (oi->typep)
 +                      *oi->typep = ptot;
 +              if (oi->typename) {
 +                      const char *tn = typename(ptot);
 +                      if (tn)
 +                              strbuf_addstr(oi->typename, tn);
 +              }
 +              if (ptot < 0) {
 +                      type = OBJ_BAD;
 +                      goto out;
 +              }
 +      }
 +
 +      if (oi->delta_base_sha1) {
 +              if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
 +                      const unsigned char *base;
 +
 +                      base = get_delta_base_sha1(p, &w_curs, curpos,
 +                                                 type, obj_offset);
 +                      if (!base) {
 +                              type = OBJ_BAD;
 +                              goto out;
 +                      }
 +
 +                      hashcpy(oi->delta_base_sha1, base);
 +              } else
 +                      hashclr(oi->delta_base_sha1);
 +      }
 +
 +out:
 +      unuse_pack(&w_curs);
 +      return type;
 +}
 +
 +static void *unpack_compressed_entry(struct packed_git *p,
 +                                  struct pack_window **w_curs,
 +                                  off_t curpos,
 +                                  unsigned long size)
 +{
 +      int st;
 +      git_zstream stream;
 +      unsigned char *buffer, *in;
 +
 +      buffer = xmallocz_gently(size);
 +      if (!buffer)
 +              return NULL;
 +      memset(&stream, 0, sizeof(stream));
 +      stream.next_out = buffer;
 +      stream.avail_out = size + 1;
 +
 +      git_inflate_init(&stream);
 +      do {
 +              in = use_pack(p, w_curs, curpos, &stream.avail_in);
 +              stream.next_in = in;
 +              st = git_inflate(&stream, Z_FINISH);
 +              if (!stream.avail_out)
 +                      break; /* the payload is larger than it should be */
 +              curpos += stream.next_in - in;
 +      } while (st == Z_OK || st == Z_BUF_ERROR);
 +      git_inflate_end(&stream);
 +      if ((st != Z_STREAM_END) || stream.total_out != size) {
 +              free(buffer);
 +              return NULL;
 +      }
 +
 +      return buffer;
 +}
 +
  static void *read_object(const unsigned char *sha1, enum object_type *type,
                         unsigned long *size);
  
@@@ -2607,7 -2473,6 +2607,7 @@@ void *unpack_entry(struct packed_git *p
        while (delta_stack_nr) {
                void *delta_data;
                void *base = data;
 +              void *external_base = NULL;
                unsigned long delta_size, base_size = size;
                int i;
  
                                      p->pack_name);
                                mark_bad_packed_object(p, base_sha1);
                                base = read_object(base_sha1, &type, &base_size);
 +                              external_base = base;
                        }
                }
  
                              "at offset %"PRIuMAX" from %s",
                              (uintmax_t)curpos, p->pack_name);
                        data = NULL;
 +                      free(external_base);
                        continue;
                }
  
                        error("failed to apply delta");
  
                free(delta_data);
 +              free(external_base);
        }
  
 -      *final_type = type;
 -      *final_size = size;
 +      if (final_type)
 +              *final_type = type;
 +      if (final_size)
 +              *final_size = size;
  
        unuse_pack(&w_curs);
  
@@@ -2709,17 -2569,6 +2709,17 @@@ const unsigned char *nth_packed_object_
        }
  }
  
 +const struct object_id *nth_packed_object_oid(struct object_id *oid,
 +                                            struct packed_git *p,
 +                                            uint32_t n)
 +{
 +      const unsigned char *hash = nth_packed_object_sha1(p, n);
 +      if (!hash)
 +              return NULL;
 +      hashcpy(oid->hash, hash);
 +      return oid;
 +}
 +
  void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
  {
        const unsigned char *ptr = vptr;
@@@ -2913,7 -2762,6 +2913,7 @@@ static int sha1_loose_object_info(cons
        git_zstream stream;
        char hdr[32];
        struct strbuf hdrbuf = STRBUF_INIT;
 +      unsigned long size_scratch;
  
        if (oi->delta_base_sha1)
                hashclr(oi->delta_base_sha1);
         * return value implicitly indicates whether the
         * object even exists.
         */
 -      if (!oi->typep && !oi->typename && !oi->sizep) {
 +      if (!oi->typep && !oi->typename && !oi->sizep && !oi->contentp) {
 +              const char *path;
                struct stat st;
 -              if (stat_sha1_file(sha1, &st) < 0)
 +              if (stat_sha1_file(sha1, &st, &path) < 0)
                        return -1;
                if (oi->disk_sizep)
                        *oi->disk_sizep = st.st_size;
        map = map_sha1_file(sha1, &mapsize);
        if (!map)
                return -1;
 +
 +      if (!oi->sizep)
 +              oi->sizep = &size_scratch;
 +
        if (oi->disk_sizep)
                *oi->disk_sizep = mapsize;
 -      if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
 +      if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) {
                if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
                        status = error("unable to unpack %s header with --allow-unknown-type",
                                       sha1_to_hex(sha1));
                                       sha1_to_hex(sha1));
        } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
                status = error("unable to parse %s header", sha1_to_hex(sha1));
 -      git_inflate_end(&stream);
 +
 +      if (status >= 0 && oi->contentp)
 +              *oi->contentp = unpack_sha1_rest(&stream, hdr,
 +                                               *oi->sizep, sha1);
 +      else
 +              git_inflate_end(&stream);
 +
        munmap(map, mapsize);
        if (status && oi->typep)
                *oi->typep = status;
 +      if (oi->sizep == &size_scratch)
 +              oi->sizep = NULL;
        strbuf_release(&hdrbuf);
 -      return 0;
 +      return (status < 0) ? status : 0;
  }
  
  int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
  {
 -      struct cached_object *co;
 +      static struct object_info blank_oi = OBJECT_INFO_INIT;
        struct pack_entry e;
        int rtype;
 -      enum object_type real_type;
 -      const unsigned char *real = lookup_replace_object_extended(sha1, flags);
 -
 -      co = find_cached_object(real);
 -      if (co) {
 -              if (oi->typep)
 -                      *(oi->typep) = co->type;
 -              if (oi->sizep)
 -                      *(oi->sizep) = co->size;
 -              if (oi->disk_sizep)
 -                      *(oi->disk_sizep) = 0;
 -              if (oi->delta_base_sha1)
 -                      hashclr(oi->delta_base_sha1);
 -              if (oi->typename)
 -                      strbuf_addstr(oi->typename, typename(co->type));
 -              oi->whence = OI_CACHED;
 -              return 0;
 +      const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ?
 +                                  lookup_replace_object(sha1) :
 +                                  sha1;
 +
 +      if (!oi)
 +              oi = &blank_oi;
 +
 +      if (!(flags & OBJECT_INFO_SKIP_CACHED)) {
 +              struct cached_object *co = find_cached_object(real);
 +              if (co) {
 +                      if (oi->typep)
 +                              *(oi->typep) = co->type;
 +                      if (oi->sizep)
 +                              *(oi->sizep) = co->size;
 +                      if (oi->disk_sizep)
 +                              *(oi->disk_sizep) = 0;
 +                      if (oi->delta_base_sha1)
 +                              hashclr(oi->delta_base_sha1);
 +                      if (oi->typename)
 +                              strbuf_addstr(oi->typename, typename(co->type));
 +                      if (oi->contentp)
 +                              *oi->contentp = xmemdupz(co->buf, co->size);
 +                      oi->whence = OI_CACHED;
 +                      return 0;
 +              }
        }
  
        if (!find_pack_entry(real, &e)) {
                }
  
                /* Not a loose object; someone else may have just packed it. */
 -              reprepare_packed_git();
 -              if (!find_pack_entry(real, &e))
 +              if (flags & OBJECT_INFO_QUICK) {
                        return -1;
 +              } else {
 +                      reprepare_packed_git();
 +                      if (!find_pack_entry(real, &e))
 +                              return -1;
 +              }
        }
  
 -      /*
 -       * packed_object_info() does not follow the delta chain to
 -       * find out the real type, unless it is given oi->typep.
 -       */
 -      if (oi->typename && !oi->typep)
 -              oi->typep = &real_type;
 +      if (oi == &blank_oi)
 +              /*
 +               * We know that the caller doesn't actually need the
 +               * information below, so return early.
 +               */
 +              return 0;
  
        rtype = packed_object_info(e.p, e.offset, oi);
        if (rtype < 0) {
                mark_bad_packed_object(e.p, real);
 -              if (oi->typep == &real_type)
 -                      oi->typep = NULL;
                return sha1_object_info_extended(real, oi, 0);
        } else if (in_delta_base_cache(e.p, e.offset)) {
                oi->whence = OI_DBCACHED;
                oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
                                         rtype == OBJ_OFS_DELTA);
        }
 -      if (oi->typename)
 -              strbuf_addstr(oi->typename, typename(*oi->typep));
 -      if (oi->typep == &real_type)
 -              oi->typep = NULL;
  
        return 0;
  }
@@@ -3057,8 -2886,7 +3057,8 @@@ int sha1_object_info(const unsigned cha
  
        oi.typep = &type;
        oi.sizep = sizep;
 -      if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0)
 +      if (sha1_object_info_extended(sha1, &oi,
 +                                    OBJECT_INFO_LOOKUP_REPLACE) < 0)
                return -1;
        return type;
  }
@@@ -3108,15 -2936,28 +3108,15 @@@ int pretend_sha1_file(void *buf, unsign
  static void *read_object(const unsigned char *sha1, enum object_type *type,
                         unsigned long *size)
  {
 -      unsigned long mapsize;
 -      void *map, *buf;
 -      struct cached_object *co;
 -
 -      co = find_cached_object(sha1);
 -      if (co) {
 -              *type = co->type;
 -              *size = co->size;
 -              return xmemdupz(co->buf, co->size);
 -      }
 +      struct object_info oi = OBJECT_INFO_INIT;
 +      void *content;
 +      oi.typep = type;
 +      oi.sizep = size;
 +      oi.contentp = &content;
  
 -      buf = read_packed_sha1(sha1, type, size);
 -      if (buf)
 -              return buf;
 -      map = map_sha1_file(sha1, &mapsize);
 -      if (map) {
 -              buf = unpack_sha1_file(map, mapsize, type, size, sha1);
 -              munmap(map, mapsize);
 -              return buf;
 -      }
 -      reprepare_packed_git();
 -      return read_packed_sha1(sha1, type, size);
 +      if (sha1_object_info_extended(sha1, &oi, 0) < 0)
 +              return NULL;
 +      return content;
  }
  
  /*
  void *read_sha1_file_extended(const unsigned char *sha1,
                              enum object_type *type,
                              unsigned long *size,
 -                            unsigned flag)
 +                            int lookup_replace)
  {
        void *data;
        const struct packed_git *p;
 -      const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
 +      const char *path;
 +      struct stat st;
 +      const unsigned char *repl = lookup_replace ? lookup_replace_object(sha1)
 +                                                 : sha1;
  
        errno = 0;
        data = read_object(repl, type, size);
                die("replacement %s not found for %s",
                    sha1_to_hex(repl), sha1_to_hex(sha1));
  
 -      if (has_loose_object(repl)) {
 -              const char *path = sha1_file_name(sha1);
 -
 +      if (!stat_sha1_file(repl, &st, &path))
                die("loose object %s (stored in %s) is corrupt",
                    sha1_to_hex(repl), path);
 -      }
  
        if ((p = has_packed_and_bad(repl)) != NULL)
                die("packed object %s (stored in %s) is corrupt",
@@@ -3495,10 -3336,16 +3495,10 @@@ int has_sha1_pack(const unsigned char *
  
  int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
  {
 -      struct pack_entry e;
 -
 -      if (find_pack_entry(sha1, &e))
 -              return 1;
 -      if (has_loose_object(sha1))
 -              return 1;
 -      if (flags & HAS_SHA1_QUICK)
 +      if (!startup_info->have_repository)
                return 0;
 -      reprepare_packed_git();
 -      return find_pack_entry(sha1, &e);
 +      return sha1_object_info_extended(sha1, NULL,
 +                                       flags | OBJECT_INFO_SKIP_CACHED) >= 0;
  }
  
  int has_object_file(const struct object_id *oid)
        return has_sha1_file(oid->hash);
  }
  
 +int has_object_file_with_flags(const struct object_id *oid, int flags)
 +{
 +      return has_sha1_file_with_flags(oid->hash, flags);
 +}
 +
  static void check_tree(const void *buf, size_t size)
  {
        struct tree_desc desc;
@@@ -3554,7 -3396,7 +3554,7 @@@ static int index_mem(unsigned char *sha
         */
        if ((type == OBJ_BLOB) && path) {
                struct strbuf nbuf = STRBUF_INIT;
 -              if (convert_to_git(path, buf, size, &nbuf,
 +              if (convert_to_git(&the_index, path, buf, size, &nbuf,
                                   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
                        buf = strbuf_detach(&nbuf, &size);
                        re_allocated = 1;
@@@ -3588,7 -3430,7 +3588,7 @@@ static int index_stream_convert_blob(un
        assert(path);
        assert(would_convert_to_git_filter_fd(path));
  
 -      convert_to_git_filter_fd(path, fd, &sbuf,
 +      convert_to_git_filter_fd(&the_index, path, fd, &sbuf,
                                 write_object ? safe_crlf : SAFE_CRLF_FALSE);
  
        if (write_object)
@@@ -3676,7 -3518,7 +3676,7 @@@ int index_fd(unsigned char *sha1, int f
        else if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
        else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
 -               (path && would_convert_to_git(path)))
 +               (path && would_convert_to_git(&the_index, path)))
                ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
                                 flags);
        else
@@@ -3743,32 -3585,22 +3743,32 @@@ void assert_sha1_type(const unsigned ch
                    typename(expect));
  }
  
 -static int for_each_file_in_obj_subdir(int subdir_nr,
 -                                     struct strbuf *path,
 -                                     each_loose_object_fn obj_cb,
 -                                     each_loose_cruft_fn cruft_cb,
 -                                     each_loose_subdir_fn subdir_cb,
 -                                     void *data)
 +int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 +                              struct strbuf *path,
 +                              each_loose_object_fn obj_cb,
 +                              each_loose_cruft_fn cruft_cb,
 +                              each_loose_subdir_fn subdir_cb,
 +                              void *data)
  {
 -      size_t baselen = path->len;
 -      DIR *dir = opendir(path->buf);
 +      size_t origlen, baselen;
 +      DIR *dir;
        struct dirent *de;
        int r = 0;
  
 +      if (subdir_nr > 0xff)
 +              BUG("invalid loose object subdirectory: %x", subdir_nr);
 +
 +      origlen = path->len;
 +      strbuf_complete(path, '/');
 +      strbuf_addf(path, "%02x", subdir_nr);
 +      baselen = path->len;
 +
 +      dir = opendir(path->buf);
        if (!dir) {
 -              if (errno == ENOENT)
 -                      return 0;
 -              return error_errno("unable to open %s", path->buf);
 +              if (errno != ENOENT)
 +                      r = error_errno("unable to open %s", path->buf);
 +              strbuf_setlen(path, origlen);
 +              return r;
        }
  
        while ((de = readdir(dir))) {
                strbuf_setlen(path, baselen);
                strbuf_addf(path, "/%s", de->d_name);
  
 -              if (strlen(de->d_name) == 38)  {
 -                      char hex[41];
 -                      unsigned char sha1[20];
 +              if (strlen(de->d_name) == GIT_SHA1_HEXSZ - 2)  {
 +                      char hex[GIT_MAX_HEXSZ+1];
 +                      struct object_id oid;
  
 -                      snprintf(hex, sizeof(hex), "%02x%s",
 -                               subdir_nr, de->d_name);
 -                      if (!get_sha1_hex(hex, sha1)) {
 +                      xsnprintf(hex, sizeof(hex), "%02x%s",
 +                                subdir_nr, de->d_name);
 +                      if (!get_oid_hex(hex, &oid)) {
                                if (obj_cb) {
 -                                      r = obj_cb(sha1, path->buf, data);
 +                                      r = obj_cb(&oid, path->buf, data);
                                        if (r)
                                                break;
                                }
        if (!r && subdir_cb)
                r = subdir_cb(subdir_nr, path->buf, data);
  
 +      strbuf_setlen(path, origlen);
 +
        return r;
  }
  
@@@ -3817,12 -3647,15 +3817,12 @@@ int for_each_loose_file_in_objdir_buf(s
                            each_loose_subdir_fn subdir_cb,
                            void *data)
  {
 -      size_t baselen = path->len;
        int r = 0;
        int i;
  
        for (i = 0; i < 256; i++) {
 -              strbuf_addf(path, "/%02x", i);
                r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
                                                subdir_cb, data);
 -              strbuf_setlen(path, baselen);
                if (r)
                        break;
        }
@@@ -3891,13 -3724,13 +3891,13 @@@ static int for_each_object_in_pack(stru
        int r = 0;
  
        for (i = 0; i < p->num_objects; i++) {
 -              const unsigned char *sha1 = nth_packed_object_sha1(p, i);
 +              struct object_id oid;
  
 -              if (!sha1)
 +              if (!nth_packed_object_oid(&oid, p, i))
                        return error("unable to get sha1 of object %u in %s",
                                     i, p->pack_name);
  
 -              r = cb(sha1, p, i, data);
 +              r = cb(&oid, p, i, data);
                if (r)
                        break;
        }
@@@ -3924,119 -3757,3 +3924,119 @@@ int for_each_packed_object(each_packed_
        }
        return r ? r : pack_errors;
  }
 +
 +static int check_stream_sha1(git_zstream *stream,
 +                           const char *hdr,
 +                           unsigned long size,
 +                           const char *path,
 +                           const unsigned char *expected_sha1)
 +{
 +      git_SHA_CTX c;
 +      unsigned char real_sha1[GIT_MAX_RAWSZ];
 +      unsigned char buf[4096];
 +      unsigned long total_read;
 +      int status = Z_OK;
 +
 +      git_SHA1_Init(&c);
 +      git_SHA1_Update(&c, hdr, stream->total_out);
 +
 +      /*
 +       * We already read some bytes into hdr, but the ones up to the NUL
 +       * do not count against the object's content size.
 +       */
 +      total_read = stream->total_out - strlen(hdr) - 1;
 +
 +      /*
 +       * This size comparison must be "<=" to read the final zlib packets;
 +       * see the comment in unpack_sha1_rest for details.
 +       */
 +      while (total_read <= size &&
 +             (status == Z_OK || status == Z_BUF_ERROR)) {
 +              stream->next_out = buf;
 +              stream->avail_out = sizeof(buf);
 +              if (size - total_read < stream->avail_out)
 +                      stream->avail_out = size - total_read;
 +              status = git_inflate(stream, Z_FINISH);
 +              git_SHA1_Update(&c, buf, stream->next_out - buf);
 +              total_read += stream->next_out - buf;
 +      }
 +      git_inflate_end(stream);
 +
 +      if (status != Z_STREAM_END) {
 +              error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
 +              return -1;
 +      }
 +      if (stream->avail_in) {
 +              error("garbage at end of loose object '%s'",
 +                    sha1_to_hex(expected_sha1));
 +              return -1;
 +      }
 +
 +      git_SHA1_Final(real_sha1, &c);
 +      if (hashcmp(expected_sha1, real_sha1)) {
 +              error("sha1 mismatch for %s (expected %s)", path,
 +                    sha1_to_hex(expected_sha1));
 +              return -1;
 +      }
 +
 +      return 0;
 +}
 +
 +int read_loose_object(const char *path,
 +                    const unsigned char *expected_sha1,
 +                    enum object_type *type,
 +                    unsigned long *size,
 +                    void **contents)
 +{
 +      int ret = -1;
 +      void *map = NULL;
 +      unsigned long mapsize;
 +      git_zstream stream;
 +      char hdr[32];
 +
 +      *contents = NULL;
 +
 +      map = map_sha1_file_1(path, NULL, &mapsize);
 +      if (!map) {
 +              error_errno("unable to mmap %s", path);
 +              goto out;
 +      }
 +
 +      if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
 +              error("unable to unpack header of %s", path);
 +              goto out;
 +      }
 +
 +      *type = parse_sha1_header(hdr, size);
 +      if (*type < 0) {
 +              error("unable to parse header of %s", path);
 +              git_inflate_end(&stream);
 +              goto out;
 +      }
 +
 +      if (*type == OBJ_BLOB) {
 +              if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
 +                      goto out;
 +      } else {
 +              *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
 +              if (!*contents) {
 +                      error("unable to unpack contents of %s", path);
 +                      git_inflate_end(&stream);
 +                      goto out;
 +              }
 +              if (check_sha1_signature(expected_sha1, *contents,
 +                                       *size, typename(*type))) {
 +                      error("sha1 mismatch for %s (expected %s)", path,
 +                            sha1_to_hex(expected_sha1));
 +                      free(*contents);
 +                      goto out;
 +              }
 +      }
 +
 +      ret = 0; /* everything checks out */
 +
 +out:
 +      if (map)
 +              munmap(map, mapsize);
 +      return ret;
 +}