Merge branch 'cc/delta-islands'
authorJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:55 +0000 (13:53 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:55 +0000 (13:53 -0700)
Lift code from GitHub to restrict delta computation so that an
object that exists in one fork is not made into a delta against
another object that does not appear in the same forked repository.

* cc/delta-islands:
pack-objects: move 'layer' into 'struct packing_data'
pack-objects: move tree_depth into 'struct packing_data'
t5320: tests for delta islands
repack: add delta-islands support
pack-objects: add delta-islands support
pack-objects: refactor code into compute_layer_order()
Add delta-islands.{c,h}

1  2 
Documentation/config.txt
Documentation/git-repack.txt
Makefile
builtin/pack-objects.c
builtin/repack.c
pack-objects.c
pack-objects.h
Simple merge
Simple merge
diff --cc Makefile
Simple merge
index f434e1fb0c1ff29e801a2a3b3fc2e67d0f86d217,d5d91eeed5c894ade986e489b9d7f41e6c718a0b..425bdc8ac5f7b341b18e387eb4df922ebea085ba
@@@ -61,7 -60,7 +62,8 @@@ static struct packing_data to_pack
  
  static struct pack_idx_entry **written_list;
  static uint32_t nr_result, nr_written, nr_seen;
 +static struct bitmap_index *bitmap_git;
+ static uint32_t write_layer;
  
  static int non_empty;
  static int reuse_delta = 1, reuse_object = 1;
@@@ -752,13 -714,56 +723,57 @@@ static void compute_layer_order(struct 
         * Finally all the rest in really tight order
         */
        for (i = last_untagged; i < to_pack.nr_objects; i++) {
-               if (!objects[i].filled)
-                       add_family_to_write_order(wo, &wo_end, &objects[i]);
+               if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
+                       add_family_to_write_order(wo, wo_end, &objects[i]);
+       }
+ }
+ static struct object_entry **compute_write_order(void)
+ {
+       uint32_t max_layers = 1;
+       unsigned int i, wo_end;
+       struct object_entry **wo;
+       struct object_entry *objects = to_pack.objects;
+       for (i = 0; i < to_pack.nr_objects; i++) {
+               objects[i].tagged = 0;
+               objects[i].filled = 0;
+               SET_DELTA_CHILD(&objects[i], NULL);
+               SET_DELTA_SIBLING(&objects[i], NULL);
+       }
+       /*
+        * Fully connect delta_child/delta_sibling network.
+        * Make sure delta_sibling is sorted in the original
+        * recency order.
+        */
+       for (i = to_pack.nr_objects; i > 0;) {
+               struct object_entry *e = &objects[--i];
+               if (!DELTA(e))
+                       continue;
+               /* Mark me as the first child */
+               e->delta_sibling_idx = DELTA(e)->delta_child_idx;
+               SET_DELTA_CHILD(DELTA(e), e);
        }
  
+       /*
+        * Mark objects that are at the tip of tags.
+        */
+       for_each_tag_ref(mark_tagged, NULL);
+       if (use_delta_islands)
+               max_layers = compute_pack_layers(&to_pack);
+       ALLOC_ARRAY(wo, to_pack.nr_objects);
+       wo_end = 0;
+       for (; write_layer < max_layers; ++write_layer)
+               compute_layer_order(wo, &wo_end);
        if (wo_end != to_pack.nr_objects)
 -              die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
 +              die(_("ordered %u objects, expected %"PRIu32),
 +                  wo_end, to_pack.nr_objects);
  
        return wo;
  }
@@@ -1541,15 -1519,12 +1556,16 @@@ static void check_object(struct object_
                        break;
                }
  
 -              if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL)) &&
 +              if (base_ref && (
 +                  (base_entry = packlist_find(&to_pack, base_ref, NULL)) ||
 +                  (thin &&
-                    bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) {
++                   bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref))) &&
+                   in_same_island(&entry->idx.oid, &base_entry->idx.oid)) {
                        /*
                         * If base_ref was set above that means we wish to
 -                       * reuse delta data, and we even found that base
 -                       * in the list of objects we want to pack. Goodie!
 +                       * reuse delta data, and either we found that object in
 +                       * the list of objects we want to pack, or it's one we
 +                       * know the receiver has.
                         *
                         * Depth value does not matter - find_deltas() will
                         * never consider reused delta as the base object to
@@@ -3064,8 -3046,11 +3107,11 @@@ static void get_object_list(int ac, con
        if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
                return;
  
+       if (use_delta_islands)
+               load_delta_islands();
        if (prepare_revision_walk(&revs))
 -              die("revision walk setup failed");
 +              die(_("revision walk setup failed"));
        mark_edges_uninteresting(&revs, show_edge);
  
        if (!fn_show_object)
index 42be88e86ce6fd5541ad067c17f5b29a4d492feb,5ab9ee69e4c9bd9559f12aad86d156573d4ba9f5..c6a7943d5cb108dbccdfd502d799ab71a9e7e146
@@@ -361,8 -258,24 +368,10 @@@ int cmd_repack(int argc, const char **a
        argv_array_push(&cmd.args, "--indexed-objects");
        if (repository_format_partial_clone)
                argv_array_push(&cmd.args, "--exclude-promisor-objects");
 -      if (window)
 -              argv_array_pushf(&cmd.args, "--window=%s", window);
 -      if (window_memory)
 -              argv_array_pushf(&cmd.args, "--window-memory=%s", window_memory);
 -      if (depth)
 -              argv_array_pushf(&cmd.args, "--depth=%s", depth);
 -      if (threads)
 -              argv_array_pushf(&cmd.args, "--threads=%s", threads);
 -      if (max_pack_size)
 -              argv_array_pushf(&cmd.args, "--max-pack-size=%s", max_pack_size);
 -      if (no_reuse_delta)
 -              argv_array_pushf(&cmd.args, "--no-reuse-delta");
 -      if (no_reuse_object)
 -              argv_array_pushf(&cmd.args, "--no-reuse-object");
        if (write_bitmaps)
                argv_array_push(&cmd.args, "--write-bitmap-index");
+       if (use_delta_islands)
+               argv_array_push(&cmd.args, "--delta-islands");
  
        if (pack_everything & ALL_INTO_ONE) {
                get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
diff --cc pack-objects.c
index 34628587725ae879729c09d1847bad2269b7b97f,98389460c2dc49c198313d1b961adffcece9120d..7f7b7dddf6955b2f791e2d97cd90381e5edfd339
@@@ -162,8 -160,12 +162,14 @@@ struct object_entry *packlist_alloc(str
  
                if (!pdata->in_pack_by_idx)
                        REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
 +              if (pdata->delta_size)
 +                      REALLOC_ARRAY(pdata->delta_size, pdata->nr_alloc);
+               if (pdata->tree_depth)
+                       REALLOC_ARRAY(pdata->tree_depth, pdata->nr_alloc);
+               if (pdata->layer)
+                       REALLOC_ARRAY(pdata->layer, pdata->nr_alloc);
        }
  
        new_entry = pdata->objects + pdata->nr_objects++;
        if (pdata->in_pack)
                pdata->in_pack[pdata->nr_objects - 1] = NULL;
  
+       if (pdata->tree_depth)
+               pdata->tree_depth[pdata->nr_objects - 1] = 0;
+       if (pdata->layer)
+               pdata->layer[pdata->nr_objects - 1] = 0;
        return new_entry;
  }
 +
 +void oe_set_delta_ext(struct packing_data *pdata,
 +                    struct object_entry *delta,
 +                    const unsigned char *sha1)
 +{
 +      struct object_entry *base;
 +
 +      ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext);
 +      base = &pdata->ext_bases[pdata->nr_ext++];
 +      memset(base, 0, sizeof(*base));
 +      hashcpy(base->idx.oid.hash, sha1);
 +
 +      /* These flags mark that we are not part of the actual pack output. */
 +      base->preferred_base = 1;
 +      base->filled = 1;
 +
 +      delta->ext_base = 1;
 +      delta->delta_idx = base - pdata->ext_bases + 1;
 +}
diff --cc pack-objects.h
index b0c1f137c675519cd527be2bc4a058e71024562c,ad3c2087641e8f017c5bfd202ac29cd7f1c57518..2ca39cfcfe26aea164fc4173c49f0ffd05d4ef04
@@@ -100,9 -97,10 +100,10 @@@ struct object_entry 
        unsigned in_pack_idx:OE_IN_PACK_BITS;   /* already in pack */
        unsigned z_delta_size:OE_Z_DELTA_BITS;
        unsigned type_valid:1;
 -      unsigned type_:TYPE_BITS;
        unsigned no_try_delta:1;
 +      unsigned type_:TYPE_BITS;
        unsigned in_pack_type:TYPE_BITS; /* could be delta */
        unsigned preferred_base:1; /*
                                    * we do not pack this, but is available
                                    * to be used as the base object to delta
@@@ -144,20 -141,11 +145,24 @@@ struct packing_data 
        struct packed_git **in_pack_by_idx;
        struct packed_git **in_pack;
  
 +#ifndef NO_PTHREADS
 +      pthread_mutex_t lock;
 +#endif
 +
 +      /*
 +       * This list contains entries for bases which we know the other side
 +       * has (e.g., via reachability bitmaps), but which aren't in our
 +       * "objects" list.
 +       */
 +      struct object_entry *ext_bases;
 +      uint32_t nr_ext, alloc_ext;
 +
        uintmax_t oe_size_limit;
 +      uintmax_t oe_delta_size_limit;
+       /* delta islands */
+       unsigned int *tree_depth;
+       unsigned char *layer;
  };
  
  void prepare_packing_data(struct packing_data *pdata);
@@@ -386,18 -344,45 +391,52 @@@ static inline void oe_set_delta_size(st
                                     struct object_entry *e,
                                     unsigned long size)
  {
 -      e->delta_size_ = size;
 -      e->delta_size_valid = e->delta_size_ == size;
 -      if (!e->delta_size_valid && size != oe_size(pack, e))
 -              BUG("this can only happen in check_object() "
 -                  "where delta size is the same as entry size");
 +      if (size < pack->oe_delta_size_limit) {
 +              e->delta_size_ = size;
 +              e->delta_size_valid = 1;
 +      } else {
 +              packing_data_lock(pack);
 +              if (!pack->delta_size)
 +                      ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 +              packing_data_unlock(pack);
 +
 +              pack->delta_size[e - pack->objects] = size;
 +              e->delta_size_valid = 0;
 +      }
  }
  
+ static inline unsigned int oe_tree_depth(struct packing_data *pack,
+                                        struct object_entry *e)
+ {
+       if (!pack->tree_depth)
+               return 0;
+       return pack->tree_depth[e - pack->objects];
+ }
+ static inline void oe_set_tree_depth(struct packing_data *pack,
+                                    struct object_entry *e,
+                                    unsigned int tree_depth)
+ {
+       if (!pack->tree_depth)
+               ALLOC_ARRAY(pack->tree_depth, pack->nr_objects);
+       pack->tree_depth[e - pack->objects] = tree_depth;
+ }
+ static inline unsigned char oe_layer(struct packing_data *pack,
+                                    struct object_entry *e)
+ {
+       if (!pack->layer)
+               return 0;
+       return pack->layer[e - pack->objects];
+ }
+ static inline void oe_set_layer(struct packing_data *pack,
+                               struct object_entry *e,
+                               unsigned char layer)
+ {
+       if (!pack->layer)
+               ALLOC_ARRAY(pack->layer, pack->nr_objects);
+       pack->layer[e - pack->objects] = layer;
+ }
  #endif