Merge branch 'nd/pack-deltify-regression-fix'
authorJunio C Hamano <gitster@pobox.com>
Wed, 22 Aug 2018 18:17:05 +0000 (11:17 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 22 Aug 2018 18:17:05 +0000 (11:17 -0700)
In a recent update in 2.18 era, "git pack-objects" started
producing a larger than necessary packfiles by missing
opportunities to use large deltas.

* nd/pack-deltify-regression-fix:
pack-objects: fix performance issues on packing large deltas

1  2 
builtin/pack-objects.c
pack-objects.c
pack-objects.h
diff --combined builtin/pack-objects.c
index 0d80dee2ba1ad82fd21622f73dac86b6010da4b6,297d44fc7370e600c74bd6129d6a8112f1930460..d1144a8f7ef79f7efa5bf64141a9133cfeee66d1
@@@ -30,7 -30,6 +30,7 @@@
  #include "list.h"
  #include "packfile.h"
  #include "object-store.h"
 +#include "dir.h"
  
  #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
  #define SIZE(obj) oe_size(&to_pack, obj)
@@@ -58,7 -57,7 +58,7 @@@ static const char *pack_usage[] = 
  static struct packing_data to_pack;
  
  static struct pack_idx_entry **written_list;
 -static uint32_t nr_result, nr_written;
 +static uint32_t nr_result, nr_written, nr_seen;
  
  static int non_empty;
  static int reuse_delta = 1, reuse_object = 1;
@@@ -68,8 -67,7 +68,8 @@@ static int pack_loose_unreachable
  static int local;
  static int have_non_local_packs;
  static int incremental;
 -static int ignore_packed_keep;
 +static int ignore_packed_keep_on_disk;
 +static int ignore_packed_keep_in_core;
  static int allow_ofs_delta;
  static struct pack_idx_option pack_idx_opts;
  static const char *base_name;
@@@ -94,7 -92,7 +94,7 @@@ static uint16_t write_bitmap_options
  static int exclude_promisor_objects;
  
  static unsigned long delta_cache_size = 0;
 -static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
 +static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
  static unsigned long cache_max_small_delta_size = 1000;
  
  static unsigned long window_memory_limit = 0;
@@@ -140,7 -138,7 +140,7 @@@ static void *get_delta(struct object_en
  
        buf = read_object_file(&entry->idx.oid, &type, &size);
        if (!buf)
 -              die("unable to read %s", oid_to_hex(&entry->idx.oid));
 +              die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
        base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
                                    &base_size);
        if (!base_buf)
                    oid_to_hex(&DELTA(entry)->idx.oid));
        delta_buf = diff_delta(base_buf, base_size,
                               buf, size, &delta_size, 0);
 +      /*
 +       * We succesfully computed this delta once but dropped it for
 +       * memory reasons. Something is very wrong if this time we
 +       * recompute and create a different delta.
 +       */
        if (!delta_buf || delta_size != DELTA_SIZE(entry))
 -              die("delta size changed");
 +              BUG("delta size changed");
        free(buf);
        free(base_buf);
        return delta_buf;
@@@ -284,7 -277,6 +284,7 @@@ static unsigned long write_no_reuse_obj
        enum object_type type;
        void *buf;
        struct git_istream *st = NULL;
 +      const unsigned hashsz = the_hash_algo->rawsz;
  
        if (!usable_delta) {
                if (oe_type(entry) == OBJ_BLOB &&
                dheader[pos] = ofs & 127;
                while (ofs >>= 7)
                        dheader[--pos] = 128 | (--ofs & 127);
 -              if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 +              if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
        } else if (type == OBJ_REF_DELTA) {
                /*
                 * Deltas with a base reference contain
 -               * an additional 20 bytes for the base sha1.
 +               * additional bytes for the base object ID.
                 */
 -              if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 +              if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
                        return 0;
                }
                hashwrite(f, header, hdrlen);
 -              hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
 -              hdrlen += 20;
 +              hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 +              hdrlen += hashsz;
        } else {
 -              if (limit && hdrlen + datalen + 20 >= limit) {
 +              if (limit && hdrlen + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
@@@ -397,7 -389,6 +397,7 @@@ static off_t write_reuse_object(struct 
        unsigned char header[MAX_PACK_OBJECT_HEADER],
                      dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
 +      const unsigned hashsz = the_hash_algo->rawsz;
        unsigned long entry_size = SIZE(entry);
  
        if (DELTA(entry))
        datalen = revidx[1].offset - offset;
        if (!pack_to_stdout && p->index_version > 1 &&
            check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
 -              error("bad packed object CRC for %s",
 +              error(_("bad packed object CRC for %s"),
                      oid_to_hex(&entry->idx.oid));
                unuse_pack(&w_curs);
                return write_no_reuse_object(f, entry, limit, usable_delta);
  
        if (!pack_to_stdout && p->index_version == 1 &&
            check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 -              error("corrupt packed object for %s",
 +              error(_("corrupt packed object for %s"),
                      oid_to_hex(&entry->idx.oid));
                unuse_pack(&w_curs);
                return write_no_reuse_object(f, entry, limit, usable_delta);
                dheader[pos] = ofs & 127;
                while (ofs >>= 7)
                        dheader[--pos] = 128 | (--ofs & 127);
 -              if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 +              if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
                hdrlen += sizeof(dheader) - pos;
                reused_delta++;
        } else if (type == OBJ_REF_DELTA) {
 -              if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 +              if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
                hashwrite(f, header, hdrlen);
 -              hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
 -              hdrlen += 20;
 +              hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 +              hdrlen += hashsz;
                reused_delta++;
        } else {
 -              if (limit && hdrlen + datalen + 20 >= limit) {
 +              if (limit && hdrlen + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
@@@ -553,7 -544,7 +553,7 @@@ static enum write_one_status write_one(
         */
        recursing = (e->idx.offset == 1);
        if (recursing) {
 -              warning("recursive delta detected for object %s",
 +              warning(_("recursive delta detected for object %s"),
                        oid_to_hex(&e->idx.oid));
                return WRITE_ONE_RECURSIVE;
        } else if (e->idx.offset || e->preferred_base) {
  
        /* make sure off_t is sufficiently large not to wrap */
        if (signed_add_overflows(*offset, size))
 -              die("pack too large for current definition of off_t");
 +              die(_("pack too large for current definition of off_t"));
        *offset += size;
        return WRITE_ONE_WRITTEN;
  }
@@@ -753,8 -744,7 +753,8 @@@ static struct object_entry **compute_wr
        }
  
        if (wo_end != to_pack.nr_objects)
 -              die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
 +              die(_("ordered %u objects, expected %"PRIu32),
 +                  wo_end, to_pack.nr_objects);
  
        return wo;
  }
@@@ -766,18 -756,18 +766,18 @@@ static off_t write_reused_pack(struct h
        int fd;
  
        if (!is_pack_valid(reuse_packfile))
 -              die("packfile is invalid: %s", reuse_packfile->pack_name);
 +              die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
  
        fd = git_open(reuse_packfile->pack_name);
        if (fd < 0)
 -              die_errno("unable to open packfile for reuse: %s",
 +              die_errno(_("unable to open packfile for reuse: %s"),
                          reuse_packfile->pack_name);
  
        if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
 -              die_errno("unable to seek in reused packfile");
 +              die_errno(_("unable to seek in reused packfile"));
  
        if (reuse_packfile_offset < 0)
 -              reuse_packfile_offset = reuse_packfile->pack_size - 20;
 +              reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
  
        total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
  
                int read_pack = xread(fd, buffer, sizeof(buffer));
  
                if (read_pack <= 0)
 -                      die_errno("unable to read from reused packfile");
 +                      die_errno(_("unable to read from reused packfile"));
  
                if (read_pack > to_write)
                        read_pack = to_write;
@@@ -862,11 -852,11 +862,11 @@@ static void write_pack_file(void
                 * If so, rewrite it like in fast-import
                 */
                if (pack_to_stdout) {
 -                      hashclose(f, oid.hash, CSUM_CLOSE);
 +                      finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
                } else if (nr_written == nr_remaining) {
 -                      hashclose(f, oid.hash, CSUM_FSYNC);
 +                      finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
                } else {
 -                      int fd = hashclose(f, oid.hash, 0);
 +                      int fd = finalize_hashfile(f, oid.hash, 0);
                        fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
                                                 nr_written, oid.hash, offset);
                        close(fd);
                         * to preserve this property.
                         */
                        if (stat(pack_tmp_name, &st) < 0) {
 -                              warning_errno("failed to stat %s", pack_tmp_name);
 +                              warning_errno(_("failed to stat %s"), pack_tmp_name);
                        } else if (!last_mtime) {
                                last_mtime = st.st_mtime;
                        } else {
                                utb.actime = st.st_atime;
                                utb.modtime = --last_mtime;
                                if (utime(pack_tmp_name, &utb) < 0)
 -                                      warning_errno("failed utime() on %s", pack_tmp_name);
 +                                      warning_errno(_("failed utime() on %s"), pack_tmp_name);
                        }
  
                        strbuf_addf(&tmpname, "%s-", base_name);
        free(write_order);
        stop_progress(&progress_state);
        if (written != nr_result)
 -              die("wrote %"PRIu32" objects while expecting %"PRIu32,
 -                      written, nr_result);
 +              die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
 +                  written, nr_result);
  }
  
  static int no_try_delta(const char *path)
  
        if (!check)
                check = attr_check_initl("delta", NULL);
 -      if (git_check_attr(path, check))
 +      if (git_check_attr(&the_index, path, check))
                return 0;
        if (ATTR_FALSE(check->items[0].value))
                return 1;
@@@ -1008,16 -998,13 +1008,16 @@@ static int want_found_object(int exclud
         * Otherwise, we signal "-1" at the end to tell the caller that we do
         * not know either way, and it needs to check more packs.
         */
 -      if (!ignore_packed_keep &&
 +      if (!ignore_packed_keep_on_disk &&
 +          !ignore_packed_keep_in_core &&
            (!local || !have_non_local_packs))
                return 1;
  
        if (local && !p->pack_local)
                return 0;
 -      if (ignore_packed_keep && p->pack_local && p->pack_keep)
 +      if (p->pack_local &&
 +          ((ignore_packed_keep_on_disk && p->pack_keep) ||
 +           (ignore_packed_keep_in_core && p->pack_keep_in_core)))
                return 0;
  
        /* we don't know yet; keep looking for more packs */
@@@ -1041,7 -1028,7 +1041,7 @@@ static int want_object_in_pack(const st
        int want;
        struct list_head *pos;
  
 -      if (!exclude && local && has_loose_object_nonlocal(oid->hash))
 +      if (!exclude && local && has_loose_object_nonlocal(oid))
                return 0;
  
        /*
@@@ -1119,8 -1106,6 +1119,8 @@@ static int add_object_entry(const struc
        off_t found_offset = 0;
        uint32_t index_pos;
  
 +      display_progress(progress_state, ++nr_seen);
 +
        if (have_duplicate_entry(oid, exclude, &index_pos))
                return 0;
  
        create_object_entry(oid, type, pack_name_hash(name),
                            exclude, name && no_try_delta(name),
                            index_pos, found_pack, found_offset);
 -
 -      display_progress(progress_state, nr_result);
        return 1;
  }
  
@@@ -1146,8 -1133,6 +1146,8 @@@ static int add_object_entry_from_bitmap
  {
        uint32_t index_pos;
  
 +      display_progress(progress_state, ++nr_seen);
 +
        if (have_duplicate_entry(oid, 0, &index_pos))
                return 0;
  
                return 0;
  
        create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset);
 -
 -      display_progress(progress_state, nr_result);
        return 1;
  }
  
@@@ -1475,7 -1462,7 +1475,7 @@@ static void check_object(struct object_
                        if (reuse_delta && !entry->preferred_base)
                                base_ref = use_pack(p, &w_curs,
                                                entry->in_pack_offset + used, NULL);
 -                      entry->in_pack_header_size = used + 20;
 +                      entry->in_pack_header_size = used + the_hash_algo->rawsz;
                        break;
                case OBJ_OFS_DELTA:
                        buf = use_pack(p, &w_curs,
                        while (c & 128) {
                                ofs += 1;
                                if (!ofs || MSB(ofs, 7)) {
 -                                      error("delta base offset overflow in pack for %s",
 +                                      error(_("delta base offset overflow in pack for %s"),
                                              oid_to_hex(&entry->idx.oid));
                                        goto give_up;
                                }
                        }
                        ofs = entry->in_pack_offset - ofs;
                        if (ofs <= 0 || ofs >= entry->in_pack_offset) {
 -                              error("delta base offset out of bound for %s",
 +                              error(_("delta base offset out of bound for %s"),
                                      oid_to_hex(&entry->idx.oid));
                                goto give_up;
                        }
                unuse_pack(&w_curs);
        }
  
 -      oe_set_type(entry, oid_object_info(&entry->idx.oid, &canonical_size));
 +      oe_set_type(entry,
 +                  oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
        if (entry->type_valid) {
                SET_SIZE(entry, canonical_size);
        } else {
@@@ -1623,15 -1609,14 +1623,15 @@@ static void drop_reused_delta(struct ob
  
        oi.sizep = &size;
        oi.typep = &type;
 -      if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
 +      if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
                /*
                 * We failed to get the info from this pack for some reason;
                 * fall back to sha1_object_info, which may find another copy.
                 * And if that fails, the error will be recorded in oe_type(entry)
                 * and dealt with in prepare_pack().
                 */
 -              oe_set_type(entry, oid_object_info(&entry->idx.oid, &size));
 +              oe_set_type(entry,
 +                          oid_object_info(the_repository, &entry->idx.oid, &size));
        } else {
                oe_set_type(entry, type);
        }
@@@ -1676,7 -1661,7 +1676,7 @@@ static void break_delta_chains(struct o
                 * is a bug.
                 */
                if (cur->dfs_state != DFS_NONE)
 -                      die("BUG: confusing delta dfs state in first pass: %d",
 +                      BUG("confusing delta dfs state in first pass: %d",
                            cur->dfs_state);
  
                /*
                if (cur->dfs_state == DFS_DONE)
                        break;
                else if (cur->dfs_state != DFS_ACTIVE)
 -                      die("BUG: confusing delta dfs state in second pass: %d",
 +                      BUG("confusing delta dfs state in second pass: %d",
                            cur->dfs_state);
  
                /*
@@@ -1767,10 -1752,6 +1767,10 @@@ static void get_object_details(void
        uint32_t i;
        struct object_entry **sorted_by_offset;
  
 +      if (progress)
 +              progress_state = start_progress(_("Counting objects"),
 +                                              to_pack.nr_objects);
 +
        sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
        for (i = 0; i < to_pack.nr_objects; i++)
                sorted_by_offset[i] = to_pack.objects + i;
                if (entry->type_valid &&
                    oe_size_greater_than(&to_pack, entry, big_file_threshold))
                        entry->no_try_delta = 1;
 +              display_progress(progress_state, i + 1);
        }
 +      stop_progress(&progress_state);
  
        /*
         * This must happen in a second pass, since we rely on the delta
@@@ -1858,30 -1837,18 +1858,30 @@@ static int delta_cacheable(unsigned lon
  
  #ifndef NO_PTHREADS
  
 +/* Protect access to object database */
  static pthread_mutex_t read_mutex;
  #define read_lock()           pthread_mutex_lock(&read_mutex)
  #define read_unlock()         pthread_mutex_unlock(&read_mutex)
  
 +/* Protect delta_cache_size */
  static pthread_mutex_t cache_mutex;
  #define cache_lock()          pthread_mutex_lock(&cache_mutex)
  #define cache_unlock()                pthread_mutex_unlock(&cache_mutex)
  
 +/*
 + * Protect object list partitioning (e.g. struct thread_param) and
 + * progress_state
 + */
  static pthread_mutex_t progress_mutex;
  #define progress_lock()               pthread_mutex_lock(&progress_mutex)
  #define progress_unlock()     pthread_mutex_unlock(&progress_mutex)
  
 +/*
 + * Access to struct object_entry is unprotected since each thread owns
 + * a portion of the main object list. Just don't access object entries
 + * ahead in the list because they can be stolen and would need
 + * progress_mutex for protection.
 + */
  #else
  
  #define read_lock()           (void)0
@@@ -1909,7 -1876,7 +1909,7 @@@ unsigned long oe_get_size_slow(struct p
  
        if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
                read_lock();
 -              if (oid_object_info(&e->idx.oid, &size) < 0)
 +              if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
                        die(_("unable to get size of %s"),
                            oid_to_hex(&e->idx.oid));
                read_unlock();
@@@ -1969,7 -1936,7 +1969,7 @@@ static int try_delta(struct unpacked *t
        /* Now some size filtering heuristics. */
        trg_size = SIZE(trg_entry);
        if (!DELTA(trg_entry)) {
 -              max_size = trg_size/2 - 20;
 +              max_size = trg_size/2 - the_hash_algo->rawsz;
                ref_depth = 1;
        } else {
                max_size = DELTA_SIZE(trg_entry);
                trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
                read_unlock();
                if (!trg->data)
 -                      die("object %s cannot be read",
 +                      die(_("object %s cannot be read"),
                            oid_to_hex(&trg_entry->idx.oid));
                if (sz != trg_size)
 -                      die("object %s inconsistent object length (%lu vs %lu)",
 +                      die(_("object %s inconsistent object length (%lu vs %lu)"),
                            oid_to_hex(&trg_entry->idx.oid), sz,
                            trg_size);
                *mem_usage += sz;
                        if (src_entry->preferred_base) {
                                static int warned = 0;
                                if (!warned++)
 -                                      warning("object %s cannot be read",
 +                                      warning(_("object %s cannot be read"),
                                                oid_to_hex(&src_entry->idx.oid));
                                /*
                                 * Those objects are not included in the
                                 */
                                return 0;
                        }
 -                      die("object %s cannot be read",
 +                      die(_("object %s cannot be read"),
                            oid_to_hex(&src_entry->idx.oid));
                }
                if (sz != src_size)
 -                      die("object %s inconsistent object length (%lu vs %lu)",
 +                      die(_("object %s inconsistent object length (%lu vs %lu)"),
                            oid_to_hex(&src_entry->idx.oid), sz,
                            src_size);
                *mem_usage += sz;
                if (!src->index) {
                        static int warned = 0;
                        if (!warned++)
 -                              warning("suboptimal pack - out of memory");
 +                              warning(_("suboptimal pack - out of memory"));
                        return 0;
                }
                *mem_usage += sizeof_delta_index(src->index);
        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
        if (!delta_buf)
                return 0;
-       if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
-               free(delta_buf);
-               return 0;
-       }
  
        if (DELTA(trg_entry)) {
                /* Prefer only shallower same-sized deltas. */
@@@ -2263,19 -2226,12 +2259,19 @@@ static void try_to_free_from_threads(si
  static try_to_free_t old_try_to_free_routine;
  
  /*
 + * The main object list is split into smaller lists, each is handed to
 + * one worker.
 + *
   * The main thread waits on the condition that (at least) one of the workers
   * has stopped working (which is indicated in the .working member of
   * struct thread_params).
 + *
   * When a work thread has completed its work, it sets .working to 0 and
   * signals the main thread and waits on the condition that .data_ready
   * becomes 1.
 + *
 + * The main thread steals half of the work from the worker that has
 + * most work left to hand it to the idle worker.
   */
  
  struct thread_params {
@@@ -2303,6 -2259,7 +2299,7 @@@ static void init_threaded_search(void
        pthread_mutex_init(&cache_mutex, NULL);
        pthread_mutex_init(&progress_mutex, NULL);
        pthread_cond_init(&progress_cond, NULL);
+       pthread_mutex_init(&to_pack.lock, NULL);
        old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
  }
  
@@@ -2366,8 -2323,8 +2363,8 @@@ static void ll_find_deltas(struct objec
                return;
        }
        if (progress > pack_to_stdout)
 -              fprintf(stderr, "Delta compression using up to %d threads.\n",
 -                              delta_search_threads);
 +              fprintf_ln(stderr, _("Delta compression using up to %d threads"),
 +                         delta_search_threads);
        p = xcalloc(delta_search_threads, sizeof(*p));
  
        /* Partition the work amongst work threads. */
                ret = pthread_create(&p[i].thread, NULL,
                                     threaded_find_deltas, &p[i]);
                if (ret)
 -                      die("unable to create thread: %s", strerror(ret));
 +                      die(_("unable to create thread: %s"), strerror(ret));
                active_threads++;
        }
  
@@@ -2499,10 -2456,10 +2496,10 @@@ static void add_tag_chain(const struct 
        if (packlist_find(&to_pack, oid->hash, NULL))
                return;
  
 -      tag = lookup_tag(oid);
 +      tag = lookup_tag(the_repository, oid);
        while (1) {
                if (!tag || parse_tag(tag) || !tag->tagged)
 -                      die("unable to pack objects reachable from tag %s",
 +                      die(_("unable to pack objects reachable from tag %s"),
                            oid_to_hex(oid));
  
                add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
@@@ -2568,7 -2525,7 +2565,7 @@@ static void prepare_pack(int window, in
                if (!entry->preferred_base) {
                        nr_deltas++;
                        if (oe_type(entry) < 0)
 -                              die("unable to get type of object %s",
 +                              die(_("unable to get type of object %s"),
                                    oid_to_hex(&entry->idx.oid));
                } else {
                        if (oe_type(entry) < 0) {
                ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
                stop_progress(&progress_state);
                if (nr_done != nr_deltas)
 -                      die("inconsistency with delta count");
 +                      die(_("inconsistency with delta count"));
        }
        free(delta_list);
  }
@@@ -2632,11 -2589,11 +2629,11 @@@ static int git_pack_config(const char *
        if (!strcmp(k, "pack.threads")) {
                delta_search_threads = git_config_int(k, v);
                if (delta_search_threads < 0)
 -                      die("invalid number of threads specified (%d)",
 +                      die(_("invalid number of threads specified (%d)"),
                            delta_search_threads);
  #ifdef NO_PTHREADS
                if (delta_search_threads != 1) {
 -                      warning("no threads support, ignoring %s", k);
 +                      warning(_("no threads support, ignoring %s"), k);
                        delta_search_threads = 0;
                }
  #endif
        if (!strcmp(k, "pack.indexversion")) {
                pack_idx_opts.version = git_config_int(k, v);
                if (pack_idx_opts.version > 2)
 -                      die("bad pack.indexversion=%"PRIu32,
 +                      die(_("bad pack.indexversion=%"PRIu32),
                            pack_idx_opts.version);
                return 0;
        }
@@@ -2663,7 -2620,7 +2660,7 @@@ static void read_object_list_from_stdin
                        if (feof(stdin))
                                break;
                        if (!ferror(stdin))
 -                              die("fgets returned NULL, not EOF, not error!");
 +                              die("BUG: fgets returned NULL, not EOF, not error!");
                        if (errno != EINTR)
                                die_errno("fgets");
                        clearerr(stdin);
                }
                if (line[0] == '-') {
                        if (get_oid_hex(line+1, &oid))
 -                              die("expected edge object ID, got garbage:\n %s",
 +                              die(_("expected edge object ID, got garbage:\n %s"),
                                    line);
                        add_preferred_base(&oid);
                        continue;
                }
                if (parse_oid_hex(line, &oid, &p))
 -                      die("expected object ID, got garbage:\n %s", line);
 +                      die(_("expected object ID, got garbage:\n %s"), line);
  
                add_preferred_base_object(p + 1);
                add_object_entry(&oid, OBJ_NONE, p + 1, 0);
@@@ -2813,10 -2770,10 +2810,10 @@@ static void add_objects_in_unpacked_pac
                struct object_id oid;
                struct object *o;
  
 -              if (!p->pack_local || p->pack_keep)
 +              if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
                        continue;
                if (open_pack_index(p))
 -                      die("cannot open pack index");
 +                      die(_("cannot open pack index"));
  
                ALLOC_GROW(in_pack.array,
                           in_pack.nr + p->num_objects,
  static int add_loose_object(const struct object_id *oid, const char *path,
                            void *data)
  {
 -      enum object_type type = oid_object_info(oid, NULL);
 +      enum object_type type = oid_object_info(the_repository, oid, NULL);
  
        if (type < 0) {
 -              warning("loose object at %s could not be examined", path);
 +              warning(_("loose object at %s could not be examined"), path);
                return 0;
        }
  
@@@ -2876,8 -2833,7 +2873,8 @@@ static int has_sha1_pack_kept_or_nonloc
                                        get_packed_git(the_repository);
  
        while (p) {
 -              if ((!p->pack_local || p->pack_keep) &&
 +              if ((!p->pack_local || p->pack_keep ||
 +                              p->pack_keep_in_core) &&
                        find_pack_entry_one(oid->hash, p)) {
                        last_found = p;
                        return 1;
@@@ -2920,11 -2876,11 +2917,11 @@@ static void loosen_unused_packed_object
        struct object_id oid;
  
        for (p = get_packed_git(the_repository); p; p = p->next) {
 -              if (!p->pack_local || p->pack_keep)
 +              if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
                        continue;
  
                if (open_pack_index(p))
 -                      die("cannot open pack index");
 +                      die(_("cannot open pack index"));
  
                for (i = 0; i < p->num_objects; i++) {
                        nth_packed_object_oid(&oid, p, i);
                            !has_sha1_pack_kept_or_nonlocal(&oid) &&
                            !loosened_object_can_be_discarded(&oid, p->mtime))
                                if (force_object_loose(&oid, p->mtime))
 -                                      die("unable to force loose object");
 +                                      die(_("unable to force loose object"));
                }
        }
  }
@@@ -2946,21 -2902,18 +2943,21 @@@ static int pack_options_allow_reuse(voi
  {
        return pack_to_stdout &&
               allow_ofs_delta &&
 -             !ignore_packed_keep &&
 +             !ignore_packed_keep_on_disk &&
 +             !ignore_packed_keep_in_core &&
               (!local || !have_non_local_packs) &&
               !incremental;
  }
  
  static int get_object_list_from_bitmap(struct rev_info *revs)
  {
 -      if (prepare_bitmap_walk(revs) < 0)
 +      struct bitmap_index *bitmap_git;
 +      if (!(bitmap_git = prepare_bitmap_walk(revs)))
                return -1;
  
        if (pack_options_allow_reuse() &&
            !reuse_partial_packfile_from_bitmap(
 +                      bitmap_git,
                        &reuse_packfile,
                        &reuse_packfile_objects,
                        &reuse_packfile_offset)) {
                display_progress(progress_state, nr_result);
        }
  
 -      traverse_bitmap_commit_list(&add_object_entry_from_bitmap);
 +      traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
 +      free_bitmap_index(bitmap_git);
        return 0;
  }
  
@@@ -2997,7 -2949,7 +2994,7 @@@ static void get_object_list(int ac, con
        setup_revisions(ac, av, &revs, NULL);
  
        /* make sure shallows are read */
 -      is_repository_shallow();
 +      is_repository_shallow(the_repository);
  
        while (fgets(line, sizeof(line), stdin) != NULL) {
                int len = strlen(line);
                                struct object_id oid;
                                if (get_oid_hex(line + 10, &oid))
                                        die("not an SHA-1 '%s'", line + 10);
 -                              register_shallow(&oid);
 +                              register_shallow(the_repository, &oid);
                                use_bitmap_index = 0;
                                continue;
                        }
 -                      die("not a rev '%s'", line);
 +                      die(_("not a rev '%s'"), line);
                }
                if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
 -                      die("bad revision '%s'", line);
 +                      die(_("bad revision '%s'"), line);
        }
  
        if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
                return;
  
        if (prepare_revision_walk(&revs))
 -              die("revision walk setup failed");
 +              die(_("revision walk setup failed"));
        mark_edges_uninteresting(&revs, show_edge);
  
        if (!fn_show_object)
                revs.ignore_missing_links = 1;
                if (add_unseen_recent_objects_to_traversal(&revs,
                                unpack_unreachable_expiration))
 -                      die("unable to add recent objects");
 +                      die(_("unable to add recent objects"));
                if (prepare_revision_walk(&revs))
 -                      die("revision walk setup failed");
 +                      die(_("revision walk setup failed"));
                traverse_commit_list(&revs, record_recent_commit,
                                     record_recent_object, NULL);
        }
        oid_array_clear(&recent_objects);
  }
  
 +static void add_extra_kept_packs(const struct string_list *names)
 +{
 +      struct packed_git *p;
 +
 +      if (!names->nr)
 +              return;
 +
 +      for (p = get_packed_git(the_repository); p; p = p->next) {
 +              const char *name = basename(p->pack_name);
 +              int i;
 +
 +              if (!p->pack_local)
 +                      continue;
 +
 +              for (i = 0; i < names->nr; i++)
 +                      if (!fspathcmp(name, names->items[i].string))
 +                              break;
 +
 +              if (i < names->nr) {
 +                      p->pack_keep_in_core = 1;
 +                      ignore_packed_keep_in_core = 1;
 +                      continue;
 +              }
 +      }
 +}
 +
  static int option_parse_index_version(const struct option *opt,
                                      const char *arg, int unset)
  {
@@@ -3124,7 -3050,6 +3121,7 @@@ int cmd_pack_objects(int argc, const ch
        struct argv_array rp = ARGV_ARRAY_INIT;
        int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
        int rev_list_index = 0;
 +      struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
        struct option pack_objects_options[] = {
                OPT_SET_INT('q', "quiet", &progress,
                            N_("do not show progress meter"), 0),
                OPT_BOOL(0, "all-progress-implied",
                         &all_progress_implied,
                         N_("similar to --all-progress when progress meter is shown")),
 -              { OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"),
 +              { OPTION_CALLBACK, 0, "index-version", NULL, N_("<version>[,<offset>]"),
                  N_("write the pack index file in the specified idx format version"),
                  0, option_parse_index_version },
                OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
                         N_("do not create an empty pack output")),
                OPT_BOOL(0, "revs", &use_internal_rev_list,
                         N_("read revision arguments from standard input")),
 -              { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,
 -                N_("limit the objects to those that are not yet packed"),
 -                PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
 -              { OPTION_SET_INT, 0, "all", &rev_list_all, NULL,
 -                N_("include objects reachable from any reference"),
 -                PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
 -              { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
 -                N_("include objects referred by reflog entries"),
 -                PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
 -              { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
 -                N_("include objects referred to by the index"),
 -                PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
 +              OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
 +                            N_("limit the objects to those that are not yet packed"),
 +                            1, PARSE_OPT_NONEG),
 +              OPT_SET_INT_F(0, "all", &rev_list_all,
 +                            N_("include objects reachable from any reference"),
 +                            1, PARSE_OPT_NONEG),
 +              OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
 +                            N_("include objects referred by reflog entries"),
 +                            1, PARSE_OPT_NONEG),
 +              OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
 +                            N_("include objects referred to by the index"),
 +                            1, PARSE_OPT_NONEG),
                OPT_BOOL(0, "stdout", &pack_to_stdout,
                         N_("output pack to stdout")),
                OPT_BOOL(0, "include-tag", &include_tag,
                         N_("create thin packs")),
                OPT_BOOL(0, "shallow", &shallow,
                         N_("create packs suitable for shallow fetches")),
 -              OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep,
 +              OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
                         N_("ignore packs that have companion .keep file")),
 +              OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
 +                              N_("ignore this pack")),
                OPT_INTEGER(0, "compression", &pack_compression_level,
                            N_("pack compression level")),
                OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
        if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
                BUG("too many dfs states, increase OE_DFS_STATE_BITS");
  
 -      check_replace_refs = 0;
 +      read_replace_refs = 0;
  
        reset_pack_idx_option(&pack_idx_opts);
        git_config(git_pack_config, NULL);
                fetch_if_missing = 0;
                argv_array_push(&rp, "--exclude-promisor-objects");
        }
 +      if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
 +              use_internal_rev_list = 1;
  
        if (!reuse_object)
                reuse_delta = 0;
        if (pack_compression_level == -1)
                pack_compression_level = Z_DEFAULT_COMPRESSION;
        else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
 -              die("bad pack compression level %d", pack_compression_level);
 +              die(_("bad pack compression level %d"), pack_compression_level);
  
        if (!delta_search_threads)      /* --threads=0 means autodetect */
                delta_search_threads = online_cpus();
  
  #ifdef NO_PTHREADS
        if (delta_search_threads != 1)
 -              warning("no threads support, ignoring --threads");
 +              warning(_("no threads support, ignoring --threads"));
  #endif
        if (!pack_to_stdout && !pack_size_limit)
                pack_size_limit = pack_size_limit_cfg;
        if (pack_to_stdout && pack_size_limit)
 -              die("--max-pack-size cannot be used to build a pack for transfer.");
 +              die(_("--max-pack-size cannot be used to build a pack for transfer"));
        if (pack_size_limit && pack_size_limit < 1024*1024) {
 -              warning("minimum pack size limit is 1 MiB");
 +              warning(_("minimum pack size limit is 1 MiB"));
                pack_size_limit = 1024*1024;
        }
  
        if (!pack_to_stdout && thin)
 -              die("--thin cannot be used to build an indexable pack.");
 +              die(_("--thin cannot be used to build an indexable pack"));
  
        if (keep_unreachable && unpack_unreachable)
 -              die("--keep-unreachable and --unpack-unreachable are incompatible.");
 +              die(_("--keep-unreachable and --unpack-unreachable are incompatible"));
        if (!rev_list_all || !rev_list_reflog || !rev_list_index)
                unpack_unreachable_expiration = 0;
  
        if (filter_options.choice) {
                if (!pack_to_stdout)
 -                      die("cannot use --filter without --stdout.");
 +                      die(_("cannot use --filter without --stdout"));
                use_bitmap_index = 0;
        }
  
                use_bitmap_index = use_bitmap_index_default;
  
        /* "hard" reasons not to use bitmaps; these just won't work at all */
 -      if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow())
 +      if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
                use_bitmap_index = 0;
  
        if (pack_to_stdout || !rev_list_all)
        if (progress && all_progress_implied)
                progress = 2;
  
 -      if (ignore_packed_keep) {
 +      add_extra_kept_packs(&keep_pack_list);
 +      if (ignore_packed_keep_on_disk) {
                struct packed_git *p;
                for (p = get_packed_git(the_repository); p; p = p->next)
                        if (p->pack_local && p->pack_keep)
                                break;
                if (!p) /* no keep-able packs found */
 -                      ignore_packed_keep = 0;
 +                      ignore_packed_keep_on_disk = 0;
        }
        if (local) {
                /*
 -               * unlike ignore_packed_keep above, we do not want to
 -               * unset "local" based on looking at packs, as it
 -               * also covers non-local objects
 +               * unlike ignore_packed_keep_on_disk above, we do not
 +               * want to unset "local" based on looking at packs, as
 +               * it also covers non-local objects
                 */
                struct packed_git *p;
                for (p = get_packed_git(the_repository); p; p = p->next) {
        prepare_packing_data(&to_pack);
  
        if (progress)
 -              progress_state = start_progress(_("Counting objects"), 0);
 +              progress_state = start_progress(_("Enumerating objects"), 0);
        if (!use_internal_rev_list)
                read_object_list_from_stdin();
        else {
                prepare_pack(window, depth);
        write_pack_file();
        if (progress)
 -              fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
 -                      " reused %"PRIu32" (delta %"PRIu32")\n",
 -                      written, written_delta, reused, reused_delta);
 +              fprintf_ln(stderr,
 +                         _("Total %"PRIu32" (delta %"PRIu32"),"
 +                           " reused %"PRIu32" (delta %"PRIu32")"),
 +                         written, written_delta, reused, reused_delta);
        return 0;
  }
diff --combined pack-objects.c
index 92708522e76b4565882177f899612d72a3f6d75a,f00a025738d49cf7eef8f3561efe75de3abadbd8..6ef87e5683aacdf738c86679712078988c0899fd
@@@ -60,7 -60,7 +60,7 @@@ static void rehash_objects(struct packi
                                                       &found);
  
                if (found)
 -                      die("BUG: Duplicate object in hash");
 +                      BUG("Duplicate object in hash");
  
                pdata->index[ix] = i + 1;
                entry++;
@@@ -146,6 -146,8 +146,8 @@@ void prepare_packing_data(struct packin
  
        pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE",
                                             1U << OE_SIZE_BITS);
+       pdata->oe_delta_size_limit = git_env_ulong("GIT_TEST_OE_DELTA_SIZE",
+                                                  1UL << OE_DELTA_SIZE_BITS);
  }
  
  struct object_entry *packlist_alloc(struct packing_data *pdata,
  
                if (!pdata->in_pack_by_idx)
                        REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
+               if (pdata->delta_size)
+                       REALLOC_ARRAY(pdata->delta_size, pdata->nr_alloc);
        }
  
        new_entry = pdata->objects + pdata->nr_objects++;
diff --combined pack-objects.h
index 08c6b57d49791556f323ebf8654e98689a19593c,6ffbdcf79f3f52bd7a479e1fa6e1f02c9e54e205..62806ccc39ea31b425089f4f38121d81a02fe5dd
@@@ -2,9 -2,7 +2,10 @@@
  #define PACK_OBJECTS_H
  
  #include "object-store.h"
+ #include "thread-utils.h"
 +#include "pack.h"
 +
 +#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
  
  #define OE_DFS_STATE_BITS     2
  #define OE_DEPTH_BITS         12
@@@ -15,7 -13,7 +16,7 @@@
   * above this limit. Don't lower it too much.
   */
  #define OE_SIZE_BITS          31
- #define OE_DELTA_SIZE_BITS    20
+ #define OE_DELTA_SIZE_BITS    23
  
  /*
   * State flags for depth-first search used for analyzing delta cycles.
@@@ -95,11 -93,12 +96,12 @@@ struct object_entry 
                                     */
        unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
        unsigned delta_size_valid:1;
+       unsigned char in_pack_header_size;
        unsigned in_pack_idx:OE_IN_PACK_BITS;   /* already in pack */
        unsigned z_delta_size:OE_Z_DELTA_BITS;
        unsigned type_valid:1;
-       unsigned type_:TYPE_BITS;
        unsigned no_try_delta:1;
+       unsigned type_:TYPE_BITS;
        unsigned in_pack_type:TYPE_BITS; /* could be delta */
        unsigned preferred_base:1; /*
                                    * we do not pack this, but is available
        unsigned tagged:1; /* near the very tip of refs */
        unsigned filled:1; /* assigned write-order */
        unsigned dfs_state:OE_DFS_STATE_BITS;
-       unsigned char in_pack_header_size;
        unsigned depth:OE_DEPTH_BITS;
  
        /*
         * pahole results on 64-bit linux (gcc and clang)
         *
-        *   size: 80, bit_padding: 20 bits, holes: 8 bits
+        *   size: 80, bit_padding: 9 bits
         *
         * and on 32-bit (gcc)
         *
-        *   size: 76, bit_padding: 20 bits, holes: 8 bits
+        *   size: 76, bit_padding: 9 bits
         */
  };
  
@@@ -131,6 -129,7 +132,7 @@@ struct packing_data 
        uint32_t index_size;
  
        unsigned int *in_pack_pos;
+       unsigned long *delta_size;
  
        /*
         * Only one of these can be non-NULL and they have different
        struct packed_git **in_pack_by_idx;
        struct packed_git **in_pack;
  
+ #ifndef NO_PTHREADS
+       pthread_mutex_t lock;
+ #endif
        uintmax_t oe_size_limit;
+       uintmax_t oe_delta_size_limit;
  };
  
  void prepare_packing_data(struct packing_data *pdata);
+ static inline void packing_data_lock(struct packing_data *pdata)
+ {
+ #ifndef NO_PTHREADS
+       pthread_mutex_lock(&pdata->lock);
+ #endif
+ }
+ static inline void packing_data_unlock(struct packing_data *pdata)
+ {
+ #ifndef NO_PTHREADS
+       pthread_mutex_unlock(&pdata->lock);
+ #endif
+ }
  struct object_entry *packlist_alloc(struct packing_data *pdata,
                                    const unsigned char *sha1,
                                    uint32_t index_pos);
@@@ -333,18 -351,34 +354,34 @@@ static inline unsigned long oe_delta_si
  {
        if (e->delta_size_valid)
                return e->delta_size_;
-       return oe_size(pack, e);
+       /*
+        * pack->detla_size[] can't be NULL because oe_set_delta_size()
+        * must have been called when a new delta is saved with
+        * oe_set_delta().
+        * If oe_delta() returns NULL (i.e. default state, which means
+        * delta_size_valid is also false), then the caller must never
+        * call oe_delta_size().
+        */
+       return pack->delta_size[e - pack->objects];
  }
  
  static inline void oe_set_delta_size(struct packing_data *pack,
                                     struct object_entry *e,
                                     unsigned long size)
  {
-       e->delta_size_ = size;
-       e->delta_size_valid = e->delta_size_ == size;
-       if (!e->delta_size_valid && size != oe_size(pack, e))
-               BUG("this can only happen in check_object() "
-                   "where delta size is the same as entry size");
+       if (size < pack->oe_delta_size_limit) {
+               e->delta_size_ = size;
+               e->delta_size_valid = 1;
+       } else {
+               packing_data_lock(pack);
+               if (!pack->delta_size)
+                       ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
+               packing_data_unlock(pack);
+               pack->delta_size[e - pack->objects] = size;
+               e->delta_size_valid = 0;
+       }
  }
  
  #endif