Merge branch 'jk/pack-delta-reuse-with-bitmap'
authorJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
When creating a thin pack, which allows objects to be made into a
delta against another object that is not in the resulting pack but
is known to be present on the receiving end, the code learned to
take advantage of the reachability bitmap; this allows the server
to send a delta against a base beyond the "boundary" commit.

* jk/pack-delta-reuse-with-bitmap:
pack-objects: reuse on-disk deltas for thin "have" objects
pack-bitmap: save "have" bitmap from walk
t/perf: add perf tests for fetches from a bitmapped server
t/perf: add infrastructure for measuring sizes
t/perf: factor out percent calculations
t/perf: factor boilerplate out of test_perf

1  2 
builtin/pack-objects.c
pack-bitmap.c
pack-objects.c
pack-objects.h
diff --combined builtin/pack-objects.c
index caa4cd0211c40e4ac0acb4178a4df260db651b88,1bd43432f7789a60e39d5b45f5370f001279b050..f434e1fb0c1ff29e801a2a3b3fc2e67d0f86d217
@@@ -31,7 -31,6 +31,7 @@@
  #include "packfile.h"
  #include "object-store.h"
  #include "dir.h"
 +#include "midx.h"
  
  #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
  #define SIZE(obj) oe_size(&to_pack, obj)
@@@ -41,6 -40,7 +41,7 @@@
  #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
  #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
  #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+ #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
  #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
  #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
  #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
@@@ -60,6 -60,7 +61,7 @@@ static struct packing_data to_pack
  
  static struct pack_idx_entry **written_list;
  static uint32_t nr_result, nr_written, nr_seen;
+ static struct bitmap_index *bitmap_git;
  
  static int non_empty;
  static int reuse_delta = 1, reuse_object = 1;
@@@ -80,6 -81,7 +82,7 @@@ static unsigned long pack_size_limit
  static int depth = 50;
  static int delta_search_threads;
  static int pack_to_stdout;
+ static int thin;
  static int num_preferred_base;
  static struct progress *progress_state;
  
@@@ -1041,7 -1043,6 +1044,7 @@@ static int want_object_in_pack(const st
  {
        int want;
        struct list_head *pos;
 +      struct multi_pack_index *m;
  
        if (!exclude && local && has_loose_object_nonlocal(oid))
                return 0;
                if (want != -1)
                        return want;
        }
 +
 +      for (m = get_multi_pack_index(the_repository); m; m = m->next) {
 +              struct pack_entry e;
 +              if (fill_midx_entry(oid, &e, m)) {
 +                      struct packed_git *p = e.p;
 +                      off_t offset;
 +
 +                      if (p == *found_pack)
 +                              offset = *found_offset;
 +                      else
 +                              offset = find_pack_entry_one(oid->hash, p);
 +
 +                      if (offset) {
 +                              if (!*found_pack) {
 +                                      if (!is_pack_valid(p))
 +                                              continue;
 +                                      *found_offset = offset;
 +                                      *found_pack = p;
 +                              }
 +                              want = want_found_object(exclude, p);
 +                              if (want != -1)
 +                                      return want;
 +                      }
 +              }
 +      }
 +
        list_for_each(pos, get_packed_git_mru(the_repository)) {
                struct packed_git *p = list_entry(pos, struct packed_git, mru);
                off_t offset;
@@@ -1538,11 -1513,15 +1541,15 @@@ static void check_object(struct object_
                        break;
                }
  
-               if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
+               if (base_ref && (
+                   (base_entry = packlist_find(&to_pack, base_ref, NULL)) ||
+                   (thin &&
+                    bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) {
                        /*
                         * If base_ref was set above that means we wish to
-                        * reuse delta data, and we even found that base
-                        * in the list of objects we want to pack. Goodie!
+                        * reuse delta data, and either we found that object in
+                        * the list of objects we want to pack, or it's one we
+                        * know the receiver has.
                         *
                         * Depth value does not matter - find_deltas() will
                         * never consider reused delta as the base object to
                         */
                        oe_set_type(entry, entry->in_pack_type);
                        SET_SIZE(entry, in_pack_size); /* delta size */
-                       SET_DELTA(entry, base_entry);
                        SET_DELTA_SIZE(entry, in_pack_size);
-                       entry->delta_sibling_idx = base_entry->delta_child_idx;
-                       SET_DELTA_CHILD(base_entry, entry);
+                       if (base_entry) {
+                               SET_DELTA(entry, base_entry);
+                               entry->delta_sibling_idx = base_entry->delta_child_idx;
+                               SET_DELTA_CHILD(base_entry, entry);
+                       } else {
+                               SET_DELTA_EXT(entry, base_ref);
+                       }
                        unuse_pack(&w_curs);
                        return;
                }
@@@ -2069,6 -2054,10 +2082,6 @@@ static int try_delta(struct unpacked *t
        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
        if (!delta_buf)
                return 0;
 -      if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
 -              free(delta_buf);
 -              return 0;
 -      }
  
        if (DELTA(trg_entry)) {
                /* Prefer only shallower same-sized deltas. */
@@@ -2327,7 -2316,6 +2340,7 @@@ static void init_threaded_search(void
        pthread_mutex_init(&cache_mutex, NULL);
        pthread_mutex_init(&progress_mutex, NULL);
        pthread_cond_init(&progress_cond, NULL);
 +      pthread_mutex_init(&to_pack.lock, NULL);
        old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
  }
  
@@@ -2834,7 -2822,7 +2847,7 @@@ static void add_objects_in_unpacked_pac
  
        memset(&in_pack, 0, sizeof(in_pack));
  
 -      for (p = get_packed_git(the_repository); p; p = p->next) {
 +      for (p = get_all_packs(the_repository); p; p = p->next) {
                struct object_id oid;
                struct object *o;
  
@@@ -2898,7 -2886,7 +2911,7 @@@ static int has_sha1_pack_kept_or_nonloc
        struct packed_git *p;
  
        p = (last_found != (void *)1) ? last_found :
 -                                      get_packed_git(the_repository);
 +                                      get_all_packs(the_repository);
  
        while (p) {
                if ((!p->pack_local || p->pack_keep ||
                        return 1;
                }
                if (p == last_found)
 -                      p = get_packed_git(the_repository);
 +                      p = get_all_packs(the_repository);
                else
                        p = p->next;
                if (p == last_found)
@@@ -2944,7 -2932,7 +2957,7 @@@ static void loosen_unused_packed_object
        uint32_t i;
        struct object_id oid;
  
 -      for (p = get_packed_git(the_repository); p; p = p->next) {
 +      for (p = get_all_packs(the_repository); p; p = p->next) {
                if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
                        continue;
  
@@@ -2979,7 -2967,6 +2992,6 @@@ static int pack_options_allow_reuse(voi
  
  static int get_object_list_from_bitmap(struct rev_info *revs)
  {
-       struct bitmap_index *bitmap_git;
        if (!(bitmap_git = prepare_bitmap_walk(revs)))
                return -1;
  
        }
  
        traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
-       free_bitmap_index(bitmap_git);
        return 0;
  }
  
@@@ -3091,7 -3077,7 +3102,7 @@@ static void add_extra_kept_packs(const 
        if (!names->nr)
                return;
  
 -      for (p = get_packed_git(the_repository); p; p = p->next) {
 +      for (p = get_all_packs(the_repository); p; p = p->next) {
                const char *name = basename(p->pack_name);
                int i;
  
@@@ -3143,7 -3129,6 +3154,6 @@@ static int option_parse_unpack_unreacha
  int cmd_pack_objects(int argc, const char **argv, const char *prefix)
  {
        int use_internal_rev_list = 0;
-       int thin = 0;
        int shallow = 0;
        int all_progress_implied = 0;
        struct argv_array rp = ARGV_ARRAY_INIT;
        add_extra_kept_packs(&keep_pack_list);
        if (ignore_packed_keep_on_disk) {
                struct packed_git *p;
 -              for (p = get_packed_git(the_repository); p; p = p->next)
 +              for (p = get_all_packs(the_repository); p; p = p->next)
                        if (p->pack_local && p->pack_keep)
                                break;
                if (!p) /* no keep-able packs found */
                 * it also covers non-local objects
                 */
                struct packed_git *p;
 -              for (p = get_packed_git(the_repository); p; p = p->next) {
 +              for (p = get_all_packs(the_repository); p; p = p->next) {
                        if (!p->pack_local) {
                                have_non_local_packs = 1;
                                break;
diff --combined pack-bitmap.c
index 4e50ab391fa0df9fc2d28a5bfdc31996635699b6,c3231ef9ef2d57ce21ebe8db64401af95a0bb6e6..c7d593c91c7a76497407a703d9f0c50925e2475a
@@@ -86,6 -86,9 +86,9 @@@ struct bitmap_index 
        /* Bitmap result of the last performed walk */
        struct bitmap *result;
  
+       /* "have" bitmap from the last performed walk */
+       struct bitmap *haves;
        /* Version of the bitmap index */
        unsigned int version;
  
@@@ -335,7 -338,7 +338,7 @@@ static int open_pack_bitmap(struct bitm
  
        assert(!bitmap_git->map && !bitmap_git->loaded);
  
 -      for (p = get_packed_git(the_repository); p; p = p->next) {
 +      for (p = get_all_packs(the_repository); p; p = p->next) {
                if (open_pack_bitmap_1(bitmap_git, p) == 0)
                        ret = 0;
        }
@@@ -759,8 -762,8 +762,8 @@@ struct bitmap_index *prepare_bitmap_wal
                bitmap_and_not(wants_bitmap, haves_bitmap);
  
        bitmap_git->result = wants_bitmap;
+       bitmap_git->haves = haves_bitmap;
  
-       bitmap_free(haves_bitmap);
        return bitmap_git;
  
  cleanup:
@@@ -1114,5 -1117,25 +1117,25 @@@ void free_bitmap_index(struct bitmap_in
        free(b->ext_index.objects);
        free(b->ext_index.hashes);
        bitmap_free(b->result);
+       bitmap_free(b->haves);
        free(b);
  }
+ int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git,
+                                    const unsigned char *sha1)
+ {
+       int pos;
+       if (!bitmap_git)
+               return 0; /* no bitmap loaded */
+       if (!bitmap_git->result)
+               BUG("failed to perform bitmap walk before querying");
+       if (!bitmap_git->haves)
+               return 0; /* walk had no "haves" */
+       pos = bitmap_position_packfile(bitmap_git, sha1);
+       if (pos < 0)
+               return 0;
+       return bitmap_get(bitmap_git->haves, pos);
+ }
diff --combined pack-objects.c
index d04cfa8e9f173b3e3b31e7b634c13adf735cb479,9ae0cecd817640d7c4a82d0e5cdaa2eed71a2e2c..34628587725ae879729c09d1847bad2269b7b97f
@@@ -99,7 -99,7 +99,7 @@@ static void prepare_in_pack_by_idx(stru
         * (i.e. in_pack_idx also zero) should return NULL.
         */
        mapping[cnt++] = NULL;
 -      for (p = get_packed_git(the_repository); p; p = p->next, cnt++) {
 +      for (p = get_all_packs(the_repository); p; p = p->next, cnt++) {
                if (cnt == nr) {
                        free(mapping);
                        return;
@@@ -146,8 -146,6 +146,8 @@@ void prepare_packing_data(struct packin
  
        pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE",
                                             1U << OE_SIZE_BITS);
 +      pdata->oe_delta_size_limit = git_env_ulong("GIT_TEST_OE_DELTA_SIZE",
 +                                                 1UL << OE_DELTA_SIZE_BITS);
  }
  
  struct object_entry *packlist_alloc(struct packing_data *pdata,
  
                if (!pdata->in_pack_by_idx)
                        REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
 +              if (pdata->delta_size)
 +                      REALLOC_ARRAY(pdata->delta_size, pdata->nr_alloc);
        }
  
        new_entry = pdata->objects + pdata->nr_objects++;
  
        return new_entry;
  }
+ void oe_set_delta_ext(struct packing_data *pdata,
+                     struct object_entry *delta,
+                     const unsigned char *sha1)
+ {
+       struct object_entry *base;
+       ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext);
+       base = &pdata->ext_bases[pdata->nr_ext++];
+       memset(base, 0, sizeof(*base));
+       hashcpy(base->idx.oid.hash, sha1);
+       /* These flags mark that we are not part of the actual pack output. */
+       base->preferred_base = 1;
+       base->filled = 1;
+       delta->ext_base = 1;
+       delta->delta_idx = base - pdata->ext_bases + 1;
+ }
diff --combined pack-objects.h
index 62806ccc39ea31b425089f4f38121d81a02fe5dd,3fd10b15c95c6c04c12671674f9f609b341c8cc5..b0c1f137c675519cd527be2bc4a058e71024562c
@@@ -2,7 -2,6 +2,7 @@@
  #define PACK_OBJECTS_H
  
  #include "object-store.h"
 +#include "thread-utils.h"
  #include "pack.h"
  
  #define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
@@@ -16,7 -15,7 +16,7 @@@
   * above this limit. Don't lower it too much.
   */
  #define OE_SIZE_BITS          31
 -#define OE_DELTA_SIZE_BITS    20
 +#define OE_DELTA_SIZE_BITS    23
  
  /*
   * State flags for depth-first search used for analyzing delta cycles.
@@@ -96,12 -95,11 +96,12 @@@ struct object_entry 
                                     */
        unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
        unsigned delta_size_valid:1;
 +      unsigned char in_pack_header_size;
        unsigned in_pack_idx:OE_IN_PACK_BITS;   /* already in pack */
        unsigned z_delta_size:OE_Z_DELTA_BITS;
        unsigned type_valid:1;
 -      unsigned type_:TYPE_BITS;
        unsigned no_try_delta:1;
 +      unsigned type_:TYPE_BITS;
        unsigned in_pack_type:TYPE_BITS; /* could be delta */
        unsigned preferred_base:1; /*
                                    * we do not pack this, but is available
        unsigned tagged:1; /* near the very tip of refs */
        unsigned filled:1; /* assigned write-order */
        unsigned dfs_state:OE_DFS_STATE_BITS;
 -      unsigned char in_pack_header_size;
        unsigned depth:OE_DEPTH_BITS;
+       unsigned ext_base:1; /* delta_idx points outside packlist */
  
        /*
         * pahole results on 64-bit linux (gcc and clang)
         *
 -       *   size: 80, bit_padding: 20 bits, holes: 8 bits
 +       *   size: 80, bit_padding: 9 bits
         *
         * and on 32-bit (gcc)
         *
 -       *   size: 76, bit_padding: 20 bits, holes: 8 bits
 +       *   size: 76, bit_padding: 9 bits
         */
  };
  
@@@ -132,7 -132,6 +133,7 @@@ struct packing_data 
        uint32_t index_size;
  
        unsigned int *in_pack_pos;
 +      unsigned long *delta_size;
  
        /*
         * Only one of these can be non-NULL and they have different
        struct packed_git **in_pack_by_idx;
        struct packed_git **in_pack;
  
 +#ifndef NO_PTHREADS
 +      pthread_mutex_t lock;
 +#endif
 +
+       /*
+        * This list contains entries for bases which we know the other side
+        * has (e.g., via reachability bitmaps), but which aren't in our
+        * "objects" list.
+        */
+       struct object_entry *ext_bases;
+       uint32_t nr_ext, alloc_ext;
        uintmax_t oe_size_limit;
 +      uintmax_t oe_delta_size_limit;
  };
  
  void prepare_packing_data(struct packing_data *pdata);
 +
 +static inline void packing_data_lock(struct packing_data *pdata)
 +{
 +#ifndef NO_PTHREADS
 +      pthread_mutex_lock(&pdata->lock);
 +#endif
 +}
 +static inline void packing_data_unlock(struct packing_data *pdata)
 +{
 +#ifndef NO_PTHREADS
 +      pthread_mutex_unlock(&pdata->lock);
 +#endif
 +}
 +
  struct object_entry *packlist_alloc(struct packing_data *pdata,
                                    const unsigned char *sha1,
                                    uint32_t index_pos);
@@@ -249,9 -237,12 +258,12 @@@ static inline struct object_entry *oe_d
                const struct packing_data *pack,
                const struct object_entry *e)
  {
-       if (e->delta_idx)
+       if (!e->delta_idx)
+               return NULL;
+       if (e->ext_base)
+               return &pack->ext_bases[e->delta_idx - 1];
+       else
                return &pack->objects[e->delta_idx - 1];
-       return NULL;
  }
  
  static inline void oe_set_delta(struct packing_data *pack,
                e->delta_idx = 0;
  }
  
+ void oe_set_delta_ext(struct packing_data *pack,
+                     struct object_entry *e,
+                     const unsigned char *sha1);
  static inline struct object_entry *oe_delta_child(
                const struct packing_data *pack,
                const struct object_entry *e)
@@@ -354,34 -349,18 +370,34 @@@ static inline unsigned long oe_delta_si
  {
        if (e->delta_size_valid)
                return e->delta_size_;
 -      return oe_size(pack, e);
 +
 +      /*
 +       * pack->detla_size[] can't be NULL because oe_set_delta_size()
 +       * must have been called when a new delta is saved with
 +       * oe_set_delta().
 +       * If oe_delta() returns NULL (i.e. default state, which means
 +       * delta_size_valid is also false), then the caller must never
 +       * call oe_delta_size().
 +       */
 +      return pack->delta_size[e - pack->objects];
  }
  
  static inline void oe_set_delta_size(struct packing_data *pack,
                                     struct object_entry *e,
                                     unsigned long size)
  {
 -      e->delta_size_ = size;
 -      e->delta_size_valid = e->delta_size_ == size;
 -      if (!e->delta_size_valid && size != oe_size(pack, e))
 -              BUG("this can only happen in check_object() "
 -                  "where delta size is the same as entry size");
 +      if (size < pack->oe_delta_size_limit) {
 +              e->delta_size_ = size;
 +              e->delta_size_valid = 1;
 +      } else {
 +              packing_data_lock(pack);
 +              if (!pack->delta_size)
 +                      ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 +              packing_data_unlock(pack);
 +
 +              pack->delta_size[e - pack->objects] = size;
 +              e->delta_size_valid = 0;
 +      }
  }
  
  #endif