Merge branch 'jh/filter-empty-contents'
authorJunio C Hamano <gitster@pobox.com>
Mon, 1 Jun 2015 19:45:10 +0000 (12:45 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 1 Jun 2015 19:45:11 +0000 (12:45 -0700)
The clean/smudge interface did not work well when filtering an
empty contents (failed and then passed the empty input through).
It can be argued that a filter that produces anything but empty for
an empty input is nonsense, but if the user wants to do strange
things, then why not?

* jh/filter-empty-contents:
sha1_file: pass empty buffer to index empty file

1  2 
sha1_file.c
t/t0021-conversion.sh
diff --combined sha1_file.c
index ccc6dac54b570d7174175242ba2d535d4006ab1c,6e2d6ec65b7e80f8e8800380146f4ba61cead190..7e38148fe52959e1cb8435132c65491dba6850b0
@@@ -8,7 -8,6 +8,7 @@@
   */
  #include "cache.h"
  #include "string-list.h"
 +#include "lockfile.h"
  #include "delta.h"
  #include "pack.h"
  #include "blob.h"
@@@ -37,6 -36,9 +37,6 @@@ static inline uintmax_t sz_fmt(size_t s
  
  const unsigned char null_sha1[20];
  
 -static const char *no_log_pack_access = "no_log_pack_access";
 -static const char *log_pack_access;
 -
  /*
   * This is meant to hold a *small* number of objects that you would
   * want read_sha1_file() to be able to return, but yet you do not want
@@@ -266,9 -268,9 +266,9 @@@ static struct alternate_object_databas
   * SHA1, an extra slash for the first level indirection, and the
   * terminating NUL.
   */
 -static int link_alt_odb_entry(const char *entry, const char *relative_base, int depth)
 +static int link_alt_odb_entry(const char *entry, const char *relative_base,
 +      int depth, const char *normalized_objdir)
  {
 -      const char *objdir = get_object_directory();
        struct alternate_object_database *ent;
        struct alternate_object_database *alt;
        int pfxlen, entlen;
                        return -1;
                }
        }
 -      if (!strcmp(ent->base, objdir)) {
 +      if (!strcmp_icase(ent->base, normalized_objdir)) {
                free(ent);
                return -1;
        }
@@@ -343,7 -345,6 +343,7 @@@ static void link_alt_odb_entries(const 
        struct string_list entries = STRING_LIST_INIT_NODUP;
        char *alt_copy;
        int i;
 +      struct strbuf objdirbuf = STRBUF_INIT;
  
        if (depth > 5) {
                error("%s: ignoring alternate object stores, nesting too deep.",
                return;
        }
  
 +      strbuf_add_absolute_path(&objdirbuf, get_object_directory());
 +      normalize_path_copy(objdirbuf.buf, objdirbuf.buf);
 +
        alt_copy = xmemdupz(alt, len);
        string_list_split_in_place(&entries, alt_copy, sep, -1);
        for (i = 0; i < entries.nr; i++) {
                        error("%s: ignoring relative alternate object store %s",
                                        relative_base, entry);
                } else {
 -                      link_alt_odb_entry(entry, relative_base, depth);
 +                      link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf);
                }
        }
        string_list_clear(&entries, 0);
        free(alt_copy);
 +      strbuf_release(&objdirbuf);
  }
  
  void read_info_alternates(const char * relative_base, int depth)
@@@ -405,7 -402,7 +405,7 @@@ void add_to_alternates_file(const char 
  {
        struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
        int fd = hold_lock_file_for_append(lock, git_path("objects/info/alternates"), LOCK_DIE_ON_ERROR);
 -      char *alt = mkpath("%s\n", reference);
 +      const char *alt = mkpath("%s\n", reference);
        write_or_die(fd, alt, strlen(alt));
        if (commit_lock_file(lock))
                die("could not close alternates file");
                link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0);
  }
  
 -void foreach_alt_odb(alt_odb_fn fn, void *cb)
 +int foreach_alt_odb(alt_odb_fn fn, void *cb)
  {
        struct alternate_object_database *ent;
 +      int r = 0;
  
        prepare_alt_odb();
 -      for (ent = alt_odb_list; ent; ent = ent->next)
 -              if (fn(ent, cb))
 -                      return;
 +      for (ent = alt_odb_list; ent; ent = ent->next) {
 +              r = fn(ent, cb);
 +              if (r)
 +                      break;
 +      }
 +      return r;
  }
  
  void prepare_alt_odb(void)
        read_info_alternates(get_object_directory(), 0);
  }
  
 -static int has_loose_object_local(const unsigned char *sha1)
 +static int freshen_file(const char *fn)
  {
 -      return !access(sha1_file_name(sha1), F_OK);
 +      struct utimbuf t;
 +      t.actime = t.modtime = time(NULL);
 +      return !utime(fn, &t);
  }
  
 -int has_loose_object_nonlocal(const unsigned char *sha1)
 +static int check_and_freshen_file(const char *fn, int freshen)
 +{
 +      if (access(fn, F_OK))
 +              return 0;
 +      if (freshen && freshen_file(fn))
 +              return 0;
 +      return 1;
 +}
 +
 +static int check_and_freshen_local(const unsigned char *sha1, int freshen)
 +{
 +      return check_and_freshen_file(sha1_file_name(sha1), freshen);
 +}
 +
 +static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
  {
        struct alternate_object_database *alt;
        prepare_alt_odb();
        for (alt = alt_odb_list; alt; alt = alt->next) {
                fill_sha1_path(alt->name, sha1);
 -              if (!access(alt->base, F_OK))
 +              if (check_and_freshen_file(alt->base, freshen))
                        return 1;
        }
        return 0;
  }
  
 +static int check_and_freshen(const unsigned char *sha1, int freshen)
 +{
 +      return check_and_freshen_local(sha1, freshen) ||
 +             check_and_freshen_nonlocal(sha1, freshen);
 +}
 +
 +int has_loose_object_nonlocal(const unsigned char *sha1)
 +{
 +      return check_and_freshen_nonlocal(sha1, 0);
 +}
 +
  static int has_loose_object(const unsigned char *sha1)
  {
 -      return has_loose_object_local(sha1) ||
 -             has_loose_object_nonlocal(sha1);
 +      return check_and_freshen(sha1, 0);
  }
  
  static unsigned int pack_used_ctr;
@@@ -694,26 -661,10 +694,26 @@@ void release_pack_memory(size_t need
                ; /* nothing */
  }
  
 +static void mmap_limit_check(size_t length)
 +{
 +      static size_t limit = 0;
 +      if (!limit) {
 +              limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
 +              if (!limit)
 +                      limit = SIZE_MAX;
 +      }
 +      if (length > limit)
 +              die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
 +                  (uintmax_t)length, (uintmax_t)limit);
 +}
 +
  void *xmmap(void *start, size_t length,
        int prot, int flags, int fd, off_t offset)
  {
 -      void *ret = mmap(start, length, prot, flags, fd, offset);
 +      void *ret;
 +
 +      mmap_limit_check(length);
 +      ret = mmap(start, length, prot, flags, fd, offset);
        if (ret == MAP_FAILED) {
                if (!length)
                        return NULL;
@@@ -1198,7 -1149,7 +1198,7 @@@ static void report_pack_garbage(struct 
        if (!report_garbage)
                return;
  
 -      sort_string_list(list);
 +      string_list_sort(list);
  
        for (i = 0; i < list->nr; i++) {
                const char *path = list->items[i].string;
  
  static void prepare_packed_git_one(char *objdir, int local)
  {
 -      /* Ensure that this buffer is large enough so that we can
 -         append "/pack/" without clobbering the stack even if
 -         strlen(objdir) were PATH_MAX.  */
 -      char path[PATH_MAX + 1 + 4 + 1 + 1];
 -      int len;
 +      struct strbuf path = STRBUF_INIT;
 +      size_t dirnamelen;
        DIR *dir;
        struct dirent *de;
        struct string_list garbage = STRING_LIST_INIT_DUP;
  
 -      sprintf(path, "%s/pack", objdir);
 -      len = strlen(path);
 -      dir = opendir(path);
 +      strbuf_addstr(&path, objdir);
 +      strbuf_addstr(&path, "/pack");
 +      dir = opendir(path.buf);
        if (!dir) {
                if (errno != ENOENT)
                        error("unable to open object pack directory: %s: %s",
 -                            path, strerror(errno));
 +                            path.buf, strerror(errno));
 +              strbuf_release(&path);
                return;
        }
 -      path[len++] = '/';
 +      strbuf_addch(&path, '/');
 +      dirnamelen = path.len;
        while ((de = readdir(dir)) != NULL) {
 -              int namelen = strlen(de->d_name);
                struct packed_git *p;
 -
 -              if (len + namelen + 1 > sizeof(path)) {
 -                      if (report_garbage) {
 -                              struct strbuf sb = STRBUF_INIT;
 -                              strbuf_addf(&sb, "%.*s/%s", len - 1, path, de->d_name);
 -                              report_garbage("path too long", sb.buf);
 -                              strbuf_release(&sb);
 -                      }
 -                      continue;
 -              }
 +              size_t base_len;
  
                if (is_dot_or_dotdot(de->d_name))
                        continue;
  
 -              strcpy(path + len, de->d_name);
 +              strbuf_setlen(&path, dirnamelen);
 +              strbuf_addstr(&path, de->d_name);
  
 -              if (has_extension(de->d_name, ".idx")) {
 +              base_len = path.len;
 +              if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
                        /* Don't reopen a pack we already have. */
                        for (p = packed_git; p; p = p->next) {
 -                              if (!memcmp(path, p->pack_name, len + namelen - 4))
 +                              size_t len;
 +                              if (strip_suffix(p->pack_name, ".pack", &len) &&
 +                                  len == base_len &&
 +                                  !memcmp(p->pack_name, path.buf, len))
                                        break;
                        }
                        if (p == NULL &&
                             * See if it really is a valid .idx file with
                             * corresponding .pack file that we can map.
                             */
 -                          (p = add_packed_git(path, len + namelen, local)) != NULL)
 +                          (p = add_packed_git(path.buf, path.len, local)) != NULL)
                                install_packed_git(p);
                }
  
                if (!report_garbage)
                        continue;
  
 -              if (has_extension(de->d_name, ".idx") ||
 -                  has_extension(de->d_name, ".pack") ||
 -                  has_extension(de->d_name, ".bitmap") ||
 -                  has_extension(de->d_name, ".keep"))
 -                      string_list_append(&garbage, path);
 +              if (ends_with(de->d_name, ".idx") ||
 +                  ends_with(de->d_name, ".pack") ||
 +                  ends_with(de->d_name, ".bitmap") ||
 +                  ends_with(de->d_name, ".keep"))
 +                      string_list_append(&garbage, path.buf);
                else
 -                      report_garbage("garbage found", path);
 +                      report_garbage("garbage found", path.buf);
        }
        closedir(dir);
        report_pack_garbage(&garbage);
        string_list_clear(&garbage, 0);
 +      strbuf_release(&path);
  }
  
  static int sort_pack(const void *a_, const void *b_)
@@@ -1564,40 -1520,6 +1564,40 @@@ int unpack_sha1_header(git_zstream *str
        return git_inflate(stream, 0);
  }
  
 +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
 +                                      unsigned long mapsize, void *buffer,
 +                                      unsigned long bufsiz, struct strbuf *header)
 +{
 +      int status;
 +
 +      status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
 +
 +      /*
 +       * Check if entire header is unpacked in the first iteration.
 +       */
 +      if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 +              return 0;
 +
 +      /*
 +       * buffer[0..bufsiz] was not large enough.  Copy the partial
 +       * result out to header, and then append the result of further
 +       * reading the stream.
 +       */
 +      strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
 +      stream->next_out = buffer;
 +      stream->avail_out = bufsiz;
 +
 +      do {
 +              status = git_inflate(stream, 0);
 +              strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
 +              if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 +                      return 0;
 +              stream->next_out = buffer;
 +              stream->avail_out = bufsiz;
 +      } while (status != Z_STREAM_END);
 +      return -1;
 +}
 +
  static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
  {
        int bytes = strlen(buffer) + 1;
   * too permissive for what we want to check. So do an anal
   * object header parse by hand.
   */
 -int parse_sha1_header(const char *hdr, unsigned long *sizep)
 +static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
 +                             unsigned int flags)
  {
 -      char type[10];
 -      int i;
 +      const char *type_buf = hdr;
        unsigned long size;
 +      int type, type_len = 0;
  
        /*
 -       * The type can be at most ten bytes (including the
 -       * terminating '\0' that we add), and is followed by
 +       * The type can be of any size but is followed by
         * a space.
         */
 -      i = 0;
        for (;;) {
                char c = *hdr++;
                if (c == ' ')
                        break;
 -              type[i++] = c;
 -              if (i >= sizeof(type))
 -                      return -1;
 +              type_len++;
        }
 -      type[i] = 0;
 +
 +      type = type_from_string_gently(type_buf, type_len, 1);
 +      if (oi->typename)
 +              strbuf_add(oi->typename, type_buf, type_len);
 +      /*
 +       * Set type to 0 if its an unknown object and
 +       * we're obtaining the type using '--allow-unkown-type'
 +       * option.
 +       */
 +      if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
 +              type = 0;
 +      else if (type < 0)
 +              die("invalid object type");
 +      if (oi->typep)
 +              *oi->typep = type;
  
        /*
         * The length must follow immediately, and be in canonical
                        size = size * 10 + c;
                }
        }
 -      *sizep = size;
 +
 +      if (oi->sizep)
 +              *oi->sizep = size;
  
        /*
         * The length must be followed by a zero byte
         */
 -      return *hdr ? -1 : type_from_string(type);
 +      return *hdr ? -1 : type;
 +}
 +
 +int parse_sha1_header(const char *hdr, unsigned long *sizep)
 +{
 +      struct object_info oi;
 +
 +      oi.sizep = sizep;
 +      oi.typename = NULL;
 +      oi.typep = NULL;
 +      return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
  }
  
  static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
@@@ -2027,9 -1926,7 +2027,9 @@@ static void *unpack_compressed_entry(st
        git_zstream stream;
        unsigned char *buffer, *in;
  
 -      buffer = xmallocz(size);
 +      buffer = xmallocz_gently(size);
 +      if (!buffer)
 +              return NULL;
        memset(&stream, 0, sizeof(stream));
        stream.next_out = buffer;
        stream.avail_out = size + 1;
@@@ -2189,9 -2086,27 +2189,9 @@@ static void *read_object(const unsigne
  
  static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
  {
 -      static FILE *log_file;
 -
 -      if (!log_pack_access)
 -              log_pack_access = getenv("GIT_TRACE_PACK_ACCESS");
 -      if (!log_pack_access)
 -              log_pack_access = no_log_pack_access;
 -      if (log_pack_access == no_log_pack_access)
 -              return;
 -
 -      if (!log_file) {
 -              log_file = fopen(log_pack_access, "w");
 -              if (!log_file) {
 -                      error("cannot open pack access log '%s' for writing: %s",
 -                            log_pack_access, strerror(errno));
 -                      log_pack_access = no_log_pack_access;
 -                      return;
 -              }
 -      }
 -      fprintf(log_file, "%s %"PRIuMAX"\n",
 -              p->pack_name, (uintmax_t)obj_offset);
 -      fflush(log_file);
 +      static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
 +      trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
 +                       p->pack_name, (uintmax_t)obj_offset);
  }
  
  int do_check_packed_object_crc;
@@@ -2216,7 -2131,8 +2216,7 @@@ void *unpack_entry(struct packed_git *p
        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
        int base_from_cache = 0;
  
 -      if (log_pack_access != no_log_pack_access)
 -              write_pack_access_log(p, obj_offset);
 +      write_pack_access_log(p, obj_offset);
  
        /* PHASE 1: drill down to the innermost base object */
        for (;;) {
@@@ -2530,8 -2446,10 +2530,8 @@@ static int fill_pack_entry(const unsign
         * answer, as it may have been deleted since the index was
         * loaded!
         */
 -      if (!is_pack_valid(p)) {
 -              warning("packfile %s cannot be accessed", p->pack_name);
 +      if (!is_pack_valid(p))
                return 0;
 -      }
        e->offset = offset;
        e->p = p;
        hashcpy(e->sha1, sha1);
@@@ -2579,15 -2497,13 +2579,15 @@@ struct packed_git *find_sha1_pack(cons
  }
  
  static int sha1_loose_object_info(const unsigned char *sha1,
 -                                struct object_info *oi)
 +                                struct object_info *oi,
 +                                int flags)
  {
 -      int status;
 -      unsigned long mapsize, size;
 +      int status = 0;
 +      unsigned long mapsize;
        void *map;
        git_zstream stream;
        char hdr[32];
 +      struct strbuf hdrbuf = STRBUF_INIT;
  
        if (oi->delta_base_sha1)
                hashclr(oi->delta_base_sha1);
         * return value implicitly indicates whether the
         * object even exists.
         */
 -      if (!oi->typep && !oi->sizep) {
 +      if (!oi->typep && !oi->typename && !oi->sizep) {
                struct stat st;
                if (stat_sha1_file(sha1, &st) < 0)
                        return -1;
                return -1;
        if (oi->disk_sizep)
                *oi->disk_sizep = mapsize;
 -      if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
 +      if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
 +              if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
 +                      status = error("unable to unpack %s header with --allow-unknown-type",
 +                                     sha1_to_hex(sha1));
 +      } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
                status = error("unable to unpack %s header",
                               sha1_to_hex(sha1));
 -      else if ((status = parse_sha1_header(hdr, &size)) < 0)
 +      if (status < 0)
 +              ; /* Do nothing */
 +      else if (hdrbuf.len) {
 +              if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
 +                      status = error("unable to parse %s header with --allow-unknown-type",
 +                                     sha1_to_hex(sha1));
 +      } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
                status = error("unable to parse %s header", sha1_to_hex(sha1));
 -      else if (oi->sizep)
 -              *oi->sizep = size;
        git_inflate_end(&stream);
        munmap(map, mapsize);
 -      if (oi->typep)
 +      if (status && oi->typep)
                *oi->typep = status;
 +      strbuf_release(&hdrbuf);
        return 0;
  }
  
@@@ -2642,7 -2549,6 +2642,7 @@@ int sha1_object_info_extended(const uns
        struct cached_object *co;
        struct pack_entry e;
        int rtype;
 +      enum object_type real_type;
        const unsigned char *real = lookup_replace_object_extended(sha1, flags);
  
        co = find_cached_object(real);
                        *(oi->disk_sizep) = 0;
                if (oi->delta_base_sha1)
                        hashclr(oi->delta_base_sha1);
 +              if (oi->typename)
 +                      strbuf_addstr(oi->typename, typename(co->type));
                oi->whence = OI_CACHED;
                return 0;
        }
  
        if (!find_pack_entry(real, &e)) {
                /* Most likely it's a loose object. */
 -              if (!sha1_loose_object_info(real, oi)) {
 +              if (!sha1_loose_object_info(real, oi, flags)) {
                        oi->whence = OI_LOOSE;
                        return 0;
                }
                        return -1;
        }
  
 +      /*
 +       * packed_object_info() does not follow the delta chain to
 +       * find out the real type, unless it is given oi->typep.
 +       */
 +      if (oi->typename && !oi->typep)
 +              oi->typep = &real_type;
 +
        rtype = packed_object_info(e.p, e.offset, oi);
        if (rtype < 0) {
                mark_bad_packed_object(e.p, real);
 +              if (oi->typep == &real_type)
 +                      oi->typep = NULL;
                return sha1_object_info_extended(real, oi, 0);
        } else if (in_delta_base_cache(e.p, e.offset)) {
                oi->whence = OI_DBCACHED;
                oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
                                         rtype == OBJ_OFS_DELTA);
        }
 +      if (oi->typename)
 +              strbuf_addstr(oi->typename, typename(*oi->typep));
 +      if (oi->typep == &real_type)
 +              oi->typep = NULL;
  
        return 0;
  }
@@@ -3025,6 -2916,7 +3025,6 @@@ static int write_loose_object(const uns
        }
  
        /* Set it up */
 -      memset(&stream, 0, sizeof(stream));
        git_deflate_init(&stream, zlib_compression_level);
        stream.next_out = compressed;
        stream.avail_out = sizeof(compressed);
        return move_temp_to_file(tmp_file, filename);
  }
  
 -int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 +static int freshen_loose_object(const unsigned char *sha1)
 +{
 +      return check_and_freshen(sha1, 1);
 +}
 +
 +static int freshen_packed_object(const unsigned char *sha1)
 +{
 +      struct pack_entry e;
 +      if (!find_pack_entry(sha1, &e))
 +              return 0;
 +      if (e.p->freshened)
 +              return 1;
 +      if (!freshen_file(e.p->pack_name))
 +              return 0;
 +      e.p->freshened = 1;
 +      return 1;
 +}
 +
 +int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
  {
 -      unsigned char sha1[20];
        char hdr[32];
        int hdrlen;
  
         * it out into .git/objects/??/?{38} file.
         */
        write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
 -      if (returnsha1)
 -              hashcpy(returnsha1, sha1);
 -      if (has_sha1_file(sha1))
 +      if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
                return 0;
        return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
  }
  
 +int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
 +                           unsigned char *sha1, unsigned flags)
 +{
 +      char *header;
 +      int hdrlen, status = 0;
 +
 +      /* type string, SP, %lu of the length plus NUL must fit this */
 +      header = xmalloc(strlen(type) + 32);
 +      write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);
 +
 +      if (!(flags & HASH_WRITE_OBJECT))
 +              goto cleanup;
 +      if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
 +              goto cleanup;
 +      status = write_loose_object(sha1, header, hdrlen, buf, len, 0);
 +
 +cleanup:
 +      free(header);
 +      return status;
 +}
 +
  int force_object_loose(const unsigned char *sha1, time_t mtime)
  {
        void *buf;
@@@ -3240,29 -3096,6 +3240,29 @@@ static int index_mem(unsigned char *sha
        return ret;
  }
  
 +static int index_stream_convert_blob(unsigned char *sha1, int fd,
 +                                   const char *path, unsigned flags)
 +{
 +      int ret;
 +      const int write_object = flags & HASH_WRITE_OBJECT;
 +      struct strbuf sbuf = STRBUF_INIT;
 +
 +      assert(path);
 +      assert(would_convert_to_git_filter_fd(path));
 +
 +      convert_to_git_filter_fd(path, fd, &sbuf,
 +                               write_object ? safe_crlf : SAFE_CRLF_FALSE);
 +
 +      if (write_object)
 +              ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
 +                                    sha1);
 +      else
 +              ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
 +                                   sha1);
 +      strbuf_release(&sbuf);
 +      return ret;
 +}
 +
  static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
                      const char *path, unsigned flags)
  {
@@@ -3286,7 -3119,7 +3286,7 @@@ static int index_core(unsigned char *sh
        int ret;
  
        if (!size) {
-               ret = index_mem(sha1, NULL, size, type, path, flags);
+               ret = index_mem(sha1, "", size, type, path, flags);
        } else if (size <= SMALL_FILE_SIZE) {
                char *buf = xmalloc(size);
                if (size == read_in_full(fd, buf, size))
@@@ -3328,22 -3161,15 +3328,22 @@@ int index_fd(unsigned char *sha1, int f
             enum object_type type, const char *path, unsigned flags)
  {
        int ret;
 -      size_t size = xsize_t(st->st_size);
  
 -      if (!S_ISREG(st->st_mode))
 +      /*
 +       * Call xsize_t() only when needed to avoid potentially unnecessary
 +       * die() for large files.
 +       */
 +      if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
 +              ret = index_stream_convert_blob(sha1, fd, path, flags);
 +      else if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
 -      else if (size <= big_file_threshold || type != OBJ_BLOB ||
 -               (path && would_convert_to_git(path, NULL, 0, 0)))
 -              ret = index_core(sha1, fd, size, type, path, flags);
 +      else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
 +               (path && would_convert_to_git(path)))
 +              ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
 +                               flags);
        else
 -              ret = index_stream(sha1, fd, size, type, path, flags);
 +              ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
 +                                 flags);
        close(fd);
        return ret;
  }
@@@ -3408,172 -3234,3 +3408,172 @@@ void assert_sha1_type(const unsigned ch
                die("%s is not a valid '%s' object", sha1_to_hex(sha1),
                    typename(expect));
  }
 +
 +static int for_each_file_in_obj_subdir(int subdir_nr,
 +                                     struct strbuf *path,
 +                                     each_loose_object_fn obj_cb,
 +                                     each_loose_cruft_fn cruft_cb,
 +                                     each_loose_subdir_fn subdir_cb,
 +                                     void *data)
 +{
 +      size_t baselen = path->len;
 +      DIR *dir = opendir(path->buf);
 +      struct dirent *de;
 +      int r = 0;
 +
 +      if (!dir) {
 +              if (errno == ENOENT)
 +                      return 0;
 +              return error("unable to open %s: %s", path->buf, strerror(errno));
 +      }
 +
 +      while ((de = readdir(dir))) {
 +              if (is_dot_or_dotdot(de->d_name))
 +                      continue;
 +
 +              strbuf_setlen(path, baselen);
 +              strbuf_addf(path, "/%s", de->d_name);
 +
 +              if (strlen(de->d_name) == 38)  {
 +                      char hex[41];
 +                      unsigned char sha1[20];
 +
 +                      snprintf(hex, sizeof(hex), "%02x%s",
 +                               subdir_nr, de->d_name);
 +                      if (!get_sha1_hex(hex, sha1)) {
 +                              if (obj_cb) {
 +                                      r = obj_cb(sha1, path->buf, data);
 +                                      if (r)
 +                                              break;
 +                              }
 +                              continue;
 +                      }
 +              }
 +
 +              if (cruft_cb) {
 +                      r = cruft_cb(de->d_name, path->buf, data);
 +                      if (r)
 +                              break;
 +              }
 +      }
 +      strbuf_setlen(path, baselen);
 +
 +      if (!r && subdir_cb)
 +              r = subdir_cb(subdir_nr, path->buf, data);
 +
 +      closedir(dir);
 +      return r;
 +}
 +
 +int for_each_loose_file_in_objdir_buf(struct strbuf *path,
 +                          each_loose_object_fn obj_cb,
 +                          each_loose_cruft_fn cruft_cb,
 +                          each_loose_subdir_fn subdir_cb,
 +                          void *data)
 +{
 +      size_t baselen = path->len;
 +      int r = 0;
 +      int i;
 +
 +      for (i = 0; i < 256; i++) {
 +              strbuf_addf(path, "/%02x", i);
 +              r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
 +                                              subdir_cb, data);
 +              strbuf_setlen(path, baselen);
 +              if (r)
 +                      break;
 +      }
 +
 +      return r;
 +}
 +
 +int for_each_loose_file_in_objdir(const char *path,
 +                                each_loose_object_fn obj_cb,
 +                                each_loose_cruft_fn cruft_cb,
 +                                each_loose_subdir_fn subdir_cb,
 +                                void *data)
 +{
 +      struct strbuf buf = STRBUF_INIT;
 +      int r;
 +
 +      strbuf_addstr(&buf, path);
 +      r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
 +                                            subdir_cb, data);
 +      strbuf_release(&buf);
 +
 +      return r;
 +}
 +
 +struct loose_alt_odb_data {
 +      each_loose_object_fn *cb;
 +      void *data;
 +};
 +
 +static int loose_from_alt_odb(struct alternate_object_database *alt,
 +                            void *vdata)
 +{
 +      struct loose_alt_odb_data *data = vdata;
 +      struct strbuf buf = STRBUF_INIT;
 +      int r;
 +
 +      /* copy base not including trailing '/' */
 +      strbuf_add(&buf, alt->base, alt->name - alt->base - 1);
 +      r = for_each_loose_file_in_objdir_buf(&buf,
 +                                            data->cb, NULL, NULL,
 +                                            data->data);
 +      strbuf_release(&buf);
 +      return r;
 +}
 +
 +int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
 +{
 +      struct loose_alt_odb_data alt;
 +      int r;
 +
 +      r = for_each_loose_file_in_objdir(get_object_directory(),
 +                                        cb, NULL, NULL, data);
 +      if (r)
 +              return r;
 +
 +      if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
 +              return 0;
 +
 +      alt.cb = cb;
 +      alt.data = data;
 +      return foreach_alt_odb(loose_from_alt_odb, &alt);
 +}
 +
 +static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
 +{
 +      uint32_t i;
 +      int r = 0;
 +
 +      for (i = 0; i < p->num_objects; i++) {
 +              const unsigned char *sha1 = nth_packed_object_sha1(p, i);
 +
 +              if (!sha1)
 +                      return error("unable to get sha1 of object %u in %s",
 +                                   i, p->pack_name);
 +
 +              r = cb(sha1, p, i, data);
 +              if (r)
 +                      break;
 +      }
 +      return r;
 +}
 +
 +int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
 +{
 +      struct packed_git *p;
 +      int r = 0;
 +
 +      prepare_packed_git();
 +      for (p = packed_git; p; p = p->next) {
 +              if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
 +                      continue;
 +              r = for_each_object_in_pack(p, cb, data);
 +              if (r)
 +                      break;
 +      }
 +      return r;
 +}
diff --combined t/t0021-conversion.sh
index e0200b9f338f0478fd55029eab9c32c6b7956476,b778faf1c85ac5cbaf02a982bef90e2dae19b7d8..718efa04d34df1f867a37d7b3073f1463457455b
@@@ -153,23 -153,17 +153,23 @@@ test_expect_success 'filter shell-escap
        :
  '
  
 -test_expect_success 'required filter success' '
 -      git config filter.required.smudge cat &&
 -      git config filter.required.clean cat &&
 +test_expect_success 'required filter should filter data' '
 +      git config filter.required.smudge ./rot13.sh &&
 +      git config filter.required.clean ./rot13.sh &&
        git config filter.required.required true &&
  
        echo "*.r filter=required" >.gitattributes &&
  
 -      echo test >test.r &&
 +      cat test.o >test.r &&
        git add test.r &&
 +
        rm -f test.r &&
 -      git checkout -- test.r
 +      git checkout -- test.r &&
 +      cmp test.o test.r &&
 +
 +      ./rot13.sh <test.o >expected &&
 +      git cat-file blob :test.r >actual &&
 +      cmp expected actual
  '
  
  test_expect_success 'required filter smudge failure' '
@@@ -196,23 -190,7 +196,23 @@@ test_expect_success 'required filter cl
        test_must_fail git add test.fc
  '
  
 -test -n "$GIT_TEST_LONG" && test_set_prereq EXPENSIVE
 +test_expect_success 'filtering large input to small output should use little memory' '
 +      git config filter.devnull.clean "cat >/dev/null" &&
 +      git config filter.devnull.required true &&
 +      for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
 +      echo "30MB filter=devnull" >.gitattributes &&
 +      GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
 +'
 +
 +test_expect_success 'filter that does not read is fine' '
 +      test-genrandom foo $((128 * 1024 + 1)) >big &&
 +      echo "big filter=epipe" >.gitattributes &&
 +      git config filter.epipe.clean "echo xyzzy" &&
 +      git add big &&
 +      git cat-file blob :big >actual &&
 +      echo xyzzy >expect &&
 +      test_cmp expect actual
 +'
  
  test_expect_success EXPENSIVE 'filter large file' '
        git config filter.largefile.smudge cat &&
        ! test -s err
  '
  
+ test_expect_success "filter: clean empty file" '
+       git config filter.in-repo-header.clean  "echo cleaned && cat" &&
+       git config filter.in-repo-header.smudge "sed 1d" &&
+       echo "empty-in-worktree    filter=in-repo-header" >>.gitattributes &&
+       >empty-in-worktree &&
+       echo cleaned >expected &&
+       git add empty-in-worktree &&
+       git show :empty-in-worktree >actual &&
+       test_cmp expected actual
+ '
+ test_expect_success "filter: smudge empty file" '
+       git config filter.empty-in-repo.clean "cat >/dev/null" &&
+       git config filter.empty-in-repo.smudge "echo smudged && cat" &&
+       echo "empty-in-repo filter=empty-in-repo" >>.gitattributes &&
+       echo dead data walking >empty-in-repo &&
+       git add empty-in-repo &&
+       echo smudged >expected &&
+       git checkout-index --prefix=filtered- empty-in-repo &&
+       test_cmp expected filtered-empty-in-repo
+ '
  test_done