pack-objects: handle island check for "external" delta base
[gitweb.git] / builtin / pack-objects.c
index 0d80dee2ba1ad82fd21622f73dac86b6010da4b6..fb97bc5ae18ab283dcd8c05a18838307f1d09052 100644 (file)
@@ -24,6 +24,7 @@
 #include "streaming.h"
 #include "thread-utils.h"
 #include "pack-bitmap.h"
+#include "delta-islands.h"
 #include "reachable.h"
 #include "sha1-array.h"
 #include "argv-array.h"
@@ -31,6 +32,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "dir.h"
+#include "midx.h"
 
 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 #define SIZE(obj) oe_size(&to_pack, obj)
@@ -40,6 +42,7 @@
 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
@@ -59,6 +62,8 @@ static struct packing_data to_pack;
 
 static struct pack_idx_entry **written_list;
 static uint32_t nr_result, nr_written, nr_seen;
+static struct bitmap_index *bitmap_git;
+static uint32_t write_layer;
 
 static int non_empty;
 static int reuse_delta = 1, reuse_object = 1;
@@ -79,6 +84,7 @@ static unsigned long pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
+static int thin;
 static int num_preferred_base;
 static struct progress *progress_state;
 
@@ -93,6 +99,8 @@ static uint16_t write_bitmap_options;
 
 static int exclude_promisor_objects;
 
+static int use_delta_islands;
+
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 static unsigned long cache_max_small_delta_size = 1000;
@@ -612,7 +620,7 @@ static inline void add_to_write_order(struct object_entry **wo,
                               unsigned int *endp,
                               struct object_entry *e)
 {
-       if (e->filled)
+       if (e->filled || oe_layer(&to_pack, e) != write_layer)
                return;
        wo[(*endp)++] = e;
        e->filled = 1;
@@ -672,48 +680,15 @@ static void add_family_to_write_order(struct object_entry **wo,
        add_descendants_to_write_order(wo, endp, root);
 }
 
-static struct object_entry **compute_write_order(void)
+static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 {
-       unsigned int i, wo_end, last_untagged;
-
-       struct object_entry **wo;
+       unsigned int i, last_untagged;
        struct object_entry *objects = to_pack.objects;
 
        for (i = 0; i < to_pack.nr_objects; i++) {
-               objects[i].tagged = 0;
-               objects[i].filled = 0;
-               SET_DELTA_CHILD(&objects[i], NULL);
-               SET_DELTA_SIBLING(&objects[i], NULL);
-       }
-
-       /*
-        * Fully connect delta_child/delta_sibling network.
-        * Make sure delta_sibling is sorted in the original
-        * recency order.
-        */
-       for (i = to_pack.nr_objects; i > 0;) {
-               struct object_entry *e = &objects[--i];
-               if (!DELTA(e))
-                       continue;
-               /* Mark me as the first child */
-               e->delta_sibling_idx = DELTA(e)->delta_child_idx;
-               SET_DELTA_CHILD(DELTA(e), e);
-       }
-
-       /*
-        * Mark objects that are at the tip of tags.
-        */
-       for_each_tag_ref(mark_tagged, NULL);
-
-       /*
-        * Give the objects in the original recency order until
-        * we see a tagged tip.
-        */
-       ALLOC_ARRAY(wo, to_pack.nr_objects);
-       for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
                if (objects[i].tagged)
                        break;
-               add_to_write_order(wo, &wo_end, &objects[i]);
+               add_to_write_order(wo, wo_end, &objects[i]);
        }
        last_untagged = i;
 
@@ -722,7 +697,7 @@ static struct object_entry **compute_write_order(void)
         */
        for (; i < to_pack.nr_objects; i++) {
                if (objects[i].tagged)
-                       add_to_write_order(wo, &wo_end, &objects[i]);
+                       add_to_write_order(wo, wo_end, &objects[i]);
        }
 
        /*
@@ -732,7 +707,7 @@ static struct object_entry **compute_write_order(void)
                if (oe_type(&objects[i]) != OBJ_COMMIT &&
                    oe_type(&objects[i]) != OBJ_TAG)
                        continue;
-               add_to_write_order(wo, &wo_end, &objects[i]);
+               add_to_write_order(wo, wo_end, &objects[i]);
        }
 
        /*
@@ -741,17 +716,61 @@ static struct object_entry **compute_write_order(void)
        for (i = last_untagged; i < to_pack.nr_objects; i++) {
                if (oe_type(&objects[i]) != OBJ_TREE)
                        continue;
-               add_to_write_order(wo, &wo_end, &objects[i]);
+               add_to_write_order(wo, wo_end, &objects[i]);
        }
 
        /*
         * Finally all the rest in really tight order
         */
        for (i = last_untagged; i < to_pack.nr_objects; i++) {
-               if (!objects[i].filled)
-                       add_family_to_write_order(wo, &wo_end, &objects[i]);
+               if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
+                       add_family_to_write_order(wo, wo_end, &objects[i]);
+       }
+}
+
+static struct object_entry **compute_write_order(void)
+{
+       uint32_t max_layers = 1;
+       unsigned int i, wo_end;
+
+       struct object_entry **wo;
+       struct object_entry *objects = to_pack.objects;
+
+       for (i = 0; i < to_pack.nr_objects; i++) {
+               objects[i].tagged = 0;
+               objects[i].filled = 0;
+               SET_DELTA_CHILD(&objects[i], NULL);
+               SET_DELTA_SIBLING(&objects[i], NULL);
+       }
+
+       /*
+        * Fully connect delta_child/delta_sibling network.
+        * Make sure delta_sibling is sorted in the original
+        * recency order.
+        */
+       for (i = to_pack.nr_objects; i > 0;) {
+               struct object_entry *e = &objects[--i];
+               if (!DELTA(e))
+                       continue;
+               /* Mark me as the first child */
+               e->delta_sibling_idx = DELTA(e)->delta_child_idx;
+               SET_DELTA_CHILD(DELTA(e), e);
        }
 
+       /*
+        * Mark objects that are at the tip of tags.
+        */
+       for_each_tag_ref(mark_tagged, NULL);
+
+       if (use_delta_islands)
+               max_layers = compute_pack_layers(&to_pack);
+
+       ALLOC_ARRAY(wo, to_pack.nr_objects);
+       wo_end = 0;
+
+       for (; write_layer < max_layers; ++write_layer)
+               compute_layer_order(wo, &wo_end);
+
        if (wo_end != to_pack.nr_objects)
                die(_("ordered %u objects, expected %"PRIu32),
                    wo_end, to_pack.nr_objects);
@@ -1040,6 +1059,7 @@ static int want_object_in_pack(const struct object_id *oid,
 {
        int want;
        struct list_head *pos;
+       struct multi_pack_index *m;
 
        if (!exclude && local && has_loose_object_nonlocal(oid))
                return 0;
@@ -1054,6 +1074,32 @@ static int want_object_in_pack(const struct object_id *oid,
                if (want != -1)
                        return want;
        }
+
+       for (m = get_multi_pack_index(the_repository); m; m = m->next) {
+               struct pack_entry e;
+               if (fill_midx_entry(oid, &e, m)) {
+                       struct packed_git *p = e.p;
+                       off_t offset;
+
+                       if (p == *found_pack)
+                               offset = *found_offset;
+                       else
+                               offset = find_pack_entry_one(oid->hash, p);
+
+                       if (offset) {
+                               if (!*found_pack) {
+                                       if (!is_pack_valid(p))
+                                               continue;
+                                       *found_offset = offset;
+                                       *found_pack = p;
+                               }
+                               want = want_found_object(exclude, p);
+                               if (want != -1)
+                                       return want;
+                       }
+               }
+       }
+
        list_for_each(pos, get_packed_git_mru(the_repository)) {
                struct packed_git *p = list_entry(pos, struct packed_git, mru);
                off_t offset;
@@ -1424,6 +1470,57 @@ static void cleanup_preferred_base(void)
        done_pbase_paths_num = done_pbase_paths_alloc = 0;
 }
 
+/*
+ * Return 1 iff the object specified by "delta" can be sent
+ * literally as a delta against the base in "base_sha1". If
+ * so, then *base_out will point to the entry in our packing
+ * list, or NULL if we must use the external-base list.
+ *
+ * Depth value does not matter - find_deltas() will
+ * never consider reused delta as the base object to
+ * deltify other objects against, in order to avoid
+ * circular deltas.
+ */
+static int can_reuse_delta(const unsigned char *base_sha1,
+                          struct object_entry *delta,
+                          struct object_entry **base_out)
+{
+       struct object_entry *base;
+
+       if (!base_sha1)
+               return 0;
+
+       /*
+        * First see if we're already sending the base (or it's explicitly in
+        * our "excluded" list).
+        */
+       base = packlist_find(&to_pack, base_sha1, NULL);
+       if (base) {
+               if (!in_same_island(&delta->idx.oid, &base->idx.oid))
+                       return 0;
+               *base_out = base;
+               return 1;
+       }
+
+       /*
+        * Otherwise, reachability bitmaps may tell us if the receiver has it,
+        * even if it was buried too deep in history to make it into the
+        * packing list.
+        */
+       if (thin && bitmap_has_sha1_in_uninteresting(bitmap_git, base_sha1)) {
+               if (use_delta_islands) {
+                       struct object_id base_oid;
+                       hashcpy(base_oid.hash, base_sha1);
+                       if (!in_same_island(&delta->idx.oid, &base_oid))
+                               return 0;
+               }
+               *base_out = NULL;
+               return 1;
+       }
+
+       return 0;
+}
+
 static void check_object(struct object_entry *entry)
 {
        unsigned long canonical_size;
@@ -1510,23 +1607,19 @@ static void check_object(struct object_entry *entry)
                        break;
                }
 
-               if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
-                       /*
-                        * If base_ref was set above that means we wish to
-                        * reuse delta data, and we even found that base
-                        * in the list of objects we want to pack. Goodie!
-                        *
-                        * Depth value does not matter - find_deltas() will
-                        * never consider reused delta as the base object to
-                        * deltify other objects against, in order to avoid
-                        * circular deltas.
-                        */
+               if (can_reuse_delta(base_ref, entry, &base_entry)) {
                        oe_set_type(entry, entry->in_pack_type);
                        SET_SIZE(entry, in_pack_size); /* delta size */
-                       SET_DELTA(entry, base_entry);
                        SET_DELTA_SIZE(entry, in_pack_size);
-                       entry->delta_sibling_idx = base_entry->delta_child_idx;
-                       SET_DELTA_CHILD(base_entry, entry);
+
+                       if (base_entry) {
+                               SET_DELTA(entry, base_entry);
+                               entry->delta_sibling_idx = base_entry->delta_child_idx;
+                               SET_DELTA_CHILD(base_entry, entry);
+                       } else {
+                               SET_DELTA_EXT(entry, base_ref);
+                       }
+
                        unuse_pack(&w_curs);
                        return;
                }
@@ -1826,6 +1919,11 @@ static int type_size_sort(const void *_a, const void *_b)
                return -1;
        if (a->preferred_base < b->preferred_base)
                return 1;
+       if (use_delta_islands) {
+               int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
+               if (island_cmp)
+                       return island_cmp;
+       }
        if (a_size > b_size)
                return -1;
        if (a_size < b_size)
@@ -1986,6 +2084,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
        if (trg_size < src_size / 32)
                return 0;
 
+       if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
+               return 0;
+
        /* Load data if not already done */
        if (!trg->data) {
                read_lock();
@@ -2041,10 +2142,6 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
        if (!delta_buf)
                return 0;
-       if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
-               free(delta_buf);
-               return 0;
-       }
 
        if (DELTA(trg_entry)) {
                /* Prefer only shallower same-sized deltas. */
@@ -2303,6 +2400,7 @@ static void init_threaded_search(void)
        pthread_mutex_init(&cache_mutex, NULL);
        pthread_mutex_init(&progress_mutex, NULL);
        pthread_cond_init(&progress_cond, NULL);
+       pthread_mutex_init(&to_pack.lock, NULL);
        old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
 }
 
@@ -2531,6 +2629,9 @@ static void prepare_pack(int window, int depth)
        uint32_t i, nr_deltas;
        unsigned n;
 
+       if (use_delta_islands)
+               resolve_tree_islands(progress, &to_pack);
+
        get_object_details();
 
        /*
@@ -2694,6 +2795,9 @@ static void show_commit(struct commit *commit, void *data)
 
        if (write_bitmap_index)
                index_commit_for_bitmap(commit);
+
+       if (use_delta_islands)
+               propagate_island_marks(commit);
 }
 
 static void show_object(struct object *obj, const char *name, void *data)
@@ -2701,6 +2805,19 @@ static void show_object(struct object *obj, const char *name, void *data)
        add_preferred_base_object(name);
        add_object_entry(&obj->oid, obj->type, name, 0);
        obj->flags |= OBJECT_ADDED;
+
+       if (use_delta_islands) {
+               const char *p;
+               unsigned depth = 0;
+               struct object_entry *ent;
+
+               for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
+                       depth++;
+
+               ent = packlist_find(&to_pack, obj->oid.hash, NULL);
+               if (ent && depth > oe_tree_depth(&to_pack, ent))
+                       oe_set_tree_depth(&to_pack, ent, depth);
+       }
 }
 
 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
@@ -2809,7 +2926,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs)
 
        memset(&in_pack, 0, sizeof(in_pack));
 
-       for (p = get_packed_git(the_repository); p; p = p->next) {
+       for (p = get_all_packs(the_repository); p; p = p->next) {
                struct object_id oid;
                struct object *o;
 
@@ -2873,7 +2990,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
        struct packed_git *p;
 
        p = (last_found != (void *)1) ? last_found :
-                                       get_packed_git(the_repository);
+                                       get_all_packs(the_repository);
 
        while (p) {
                if ((!p->pack_local || p->pack_keep ||
@@ -2883,7 +3000,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
                        return 1;
                }
                if (p == last_found)
-                       p = get_packed_git(the_repository);
+                       p = get_all_packs(the_repository);
                else
                        p = p->next;
                if (p == last_found)
@@ -2919,7 +3036,7 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
        uint32_t i;
        struct object_id oid;
 
-       for (p = get_packed_git(the_repository); p; p = p->next) {
+       for (p = get_all_packs(the_repository); p; p = p->next) {
                if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
                        continue;
 
@@ -2954,7 +3071,6 @@ static int pack_options_allow_reuse(void)
 
 static int get_object_list_from_bitmap(struct rev_info *revs)
 {
-       struct bitmap_index *bitmap_git;
        if (!(bitmap_git = prepare_bitmap_walk(revs)))
                return -1;
 
@@ -2970,7 +3086,6 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
        }
 
        traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
-       free_bitmap_index(bitmap_git);
        return 0;
 }
 
@@ -3028,6 +3143,9 @@ static void get_object_list(int ac, const char **av)
        if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
                return;
 
+       if (use_delta_islands)
+               load_delta_islands();
+
        if (prepare_revision_walk(&revs))
                die(_("revision walk setup failed"));
        mark_edges_uninteresting(&revs, show_edge);
@@ -3066,7 +3184,7 @@ static void add_extra_kept_packs(const struct string_list *names)
        if (!names->nr)
                return;
 
-       for (p = get_packed_git(the_repository); p; p = p->next) {
+       for (p = get_all_packs(the_repository); p; p = p->next) {
                const char *name = basename(p->pack_name);
                int i;
 
@@ -3118,7 +3236,6 @@ static int option_parse_unpack_unreachable(const struct option *opt,
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
        int use_internal_rev_list = 0;
-       int thin = 0;
        int shallow = 0;
        int all_progress_implied = 0;
        struct argv_array rp = ARGV_ARRAY_INIT;
@@ -3207,6 +3324,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                  option_parse_missing_action },
                OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
                         N_("do not pack objects in promisor packfiles")),
+               OPT_BOOL(0, "delta-islands", &use_delta_islands,
+                        N_("respect islands during delta compression")),
                OPT_END(),
        };
 
@@ -3333,13 +3452,16 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (pack_to_stdout || !rev_list_all)
                write_bitmap_index = 0;
 
+       if (use_delta_islands)
+               argv_array_push(&rp, "--topo-order");
+
        if (progress && all_progress_implied)
                progress = 2;
 
        add_extra_kept_packs(&keep_pack_list);
        if (ignore_packed_keep_on_disk) {
                struct packed_git *p;
-               for (p = get_packed_git(the_repository); p; p = p->next)
+               for (p = get_all_packs(the_repository); p; p = p->next)
                        if (p->pack_local && p->pack_keep)
                                break;
                if (!p) /* no keep-able packs found */
@@ -3352,7 +3474,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                 * it also covers non-local objects
                 */
                struct packed_git *p;
-               for (p = get_packed_git(the_repository); p; p = p->next) {
+               for (p = get_all_packs(the_repository); p; p = p->next) {
                        if (!p->pack_local) {
                                have_non_local_packs = 1;
                                break;