Merge branch 'jk/clone-copy-alternates-fix'
[gitweb.git] / builtin / pack-objects.c
index 19668d3517e42df173b38b3950ec54586a1162f5..1e7c2a98a5617b8b42422c22051c5f61b2510751 100644 (file)
@@ -23,6 +23,7 @@
 #include "reachable.h"
 #include "sha1-array.h"
 #include "argv-array.h"
+#include "mru.h"
 
 static const char *pack_usage[] = {
        N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@@ -67,7 +68,8 @@ static struct packed_git *reuse_packfile;
 static uint32_t reuse_packfile_objects;
 static off_t reuse_packfile_offset;
 
-static int use_bitmap_index = 1;
+static int use_bitmap_index_default = 1;
+static int use_bitmap_index = -1;
 static int write_bitmap_index;
 static uint16_t write_bitmap_options;
 
@@ -343,15 +345,15 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
 }
 
 /* Return 0 if we will bust the pack-size limit */
-static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
-                                       unsigned long limit, int usable_delta)
+static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry,
+                               unsigned long limit, int usable_delta)
 {
        struct packed_git *p = entry->in_pack;
        struct pack_window *w_curs = NULL;
        struct revindex_entry *revidx;
        off_t offset;
        enum object_type type = entry->type;
-       unsigned long datalen;
+       off_t datalen;
        unsigned char header[10], dheader[10];
        unsigned hdrlen;
 
@@ -417,11 +419,12 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry
 }
 
 /* Return 0 if we will bust the pack-size limit */
-static unsigned long write_object(struct sha1file *f,
-                                 struct object_entry *entry,
-                                 off_t write_offset)
+static off_t write_object(struct sha1file *f,
+                         struct object_entry *entry,
+                         off_t write_offset)
 {
-       unsigned long limit, len;
+       unsigned long limit;
+       off_t len;
        int usable_delta, to_reuse;
 
        if (!pack_to_stdout)
@@ -493,7 +496,7 @@ static enum write_one_status write_one(struct sha1file *f,
                                       struct object_entry *e,
                                       off_t *offset)
 {
-       unsigned long size;
+       off_t size;
        int recursing;
 
        /*
@@ -992,7 +995,7 @@ static int want_object_in_pack(const unsigned char *sha1,
                               struct packed_git **found_pack,
                               off_t *found_offset)
 {
-       struct packed_git *p;
+       struct mru_entry *entry;
        int want;
 
        if (!exclude && local && has_loose_object_nonlocal(sha1))
@@ -1009,7 +1012,8 @@ static int want_object_in_pack(const unsigned char *sha1,
                        return want;
        }
 
-       for (p = packed_git; p; p = p->next) {
+       for (entry = packed_git_mru->head; entry; entry = entry->next) {
+               struct packed_git *p = entry->item;
                off_t offset;
 
                if (p == *found_pack)
@@ -1025,6 +1029,8 @@ static int want_object_in_pack(const unsigned char *sha1,
                                *found_pack = p;
                        }
                        want = want_found_object(exclude, p);
+                       if (!exclude && want > 0)
+                               mru_mark(packed_git_mru, entry);
                        if (want != -1)
                                return want;
                }
@@ -1525,6 +1531,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
                        (a->in_pack_offset > b->in_pack_offset);
 }
 
+/*
+ * Drop an on-disk delta we were planning to reuse. Naively, this would
+ * just involve blanking out the "delta" field, but we have to deal
+ * with some extra book-keeping:
+ *
+ *   1. Removing ourselves from the delta_sibling linked list.
+ *
+ *   2. Updating our size/type to the non-delta representation. These were
+ *      either not recorded initially (size) or overwritten with the delta type
+ *      (type) when check_object() decided to reuse the delta.
+ */
+static void drop_reused_delta(struct object_entry *entry)
+{
+       struct object_entry **p = &entry->delta->delta_child;
+       struct object_info oi = OBJECT_INFO_INIT;
+
+       while (*p) {
+               if (*p == entry)
+                       *p = (*p)->delta_sibling;
+               else
+                       p = &(*p)->delta_sibling;
+       }
+       entry->delta = NULL;
+
+       oi.sizep = &entry->size;
+       oi.typep = &entry->type;
+       if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
+               /*
+                * We failed to get the info from this pack for some reason;
+                * fall back to sha1_object_info, which may find another copy.
+                * And if that fails, the error will be recorded in entry->type
+                * and dealt with in prepare_pack().
+                */
+               entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
+       }
+}
+
+/*
+ * Follow the chain of deltas from this entry onward, throwing away any links
+ * that cause us to hit a cycle (as determined by the DFS state flags in
+ * the entries).
+ */
+static void break_delta_chains(struct object_entry *entry)
+{
+       /* If it's not a delta, it can't be part of a cycle. */
+       if (!entry->delta) {
+               entry->dfs_state = DFS_DONE;
+               return;
+       }
+
+       switch (entry->dfs_state) {
+       case DFS_NONE:
+               /*
+                * This is the first time we've seen the object. We mark it as
+                * part of the active potential cycle and recurse.
+                */
+               entry->dfs_state = DFS_ACTIVE;
+               break_delta_chains(entry->delta);
+               entry->dfs_state = DFS_DONE;
+               break;
+
+       case DFS_DONE:
+               /* object already examined, and not part of a cycle */
+               break;
+
+       case DFS_ACTIVE:
+               /*
+                * We found a cycle that needs broken. It would be correct to
+                * break any link in the chain, but it's convenient to
+                * break this one.
+                */
+               drop_reused_delta(entry);
+               entry->dfs_state = DFS_DONE;
+               break;
+       }
+}
+
 static void get_object_details(void)
 {
        uint32_t i;
@@ -1533,7 +1616,7 @@ static void get_object_details(void)
        sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
        for (i = 0; i < to_pack.nr_objects; i++)
                sorted_by_offset[i] = to_pack.objects + i;
-       qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
+       QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
 
        for (i = 0; i < to_pack.nr_objects; i++) {
                struct object_entry *entry = sorted_by_offset[i];
@@ -1542,6 +1625,13 @@ static void get_object_details(void)
                        entry->no_try_delta = 1;
        }
 
+       /*
+        * This must happen in a second pass, since we rely on the delta
+        * information for the whole list being completed.
+        */
+       for (i = 0; i < to_pack.nr_objects; i++)
+               break_delta_chains(&to_pack.objects[i]);
+
        free(sorted_by_offset);
 }
 
@@ -2153,6 +2243,35 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
 #define ll_find_deltas(l, s, w, d, p)  find_deltas(l, &s, w, d, p)
 #endif
 
+static void add_tag_chain(const struct object_id *oid)
+{
+       struct tag *tag;
+
+       /*
+        * We catch duplicates already in add_object_entry(), but we'd
+        * prefer to do this extra check to avoid having to parse the
+        * tag at all if we already know that it's being packed (e.g., if
+        * it was included via bitmaps, we would not have parsed it
+        * previously).
+        */
+       if (packlist_find(&to_pack, oid->hash, NULL))
+               return;
+
+       tag = lookup_tag(oid->hash);
+       while (1) {
+               if (!tag || parse_tag(tag) || !tag->tagged)
+                       die("unable to pack objects reachable from tag %s",
+                           oid_to_hex(oid));
+
+               add_object_entry(tag->object.oid.hash, OBJ_TAG, NULL, 0);
+
+               if (tag->tagged->type != OBJ_TAG)
+                       return;
+
+               tag = (struct tag *)tag->tagged;
+       }
+}
+
 static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data)
 {
        struct object_id peeled;
@@ -2160,7 +2279,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag,
        if (starts_with(path, "refs/tags/") && /* is a tag? */
            !peel_ref(path, peeled.hash)    && /* peelable? */
            packlist_find(&to_pack, peeled.hash, NULL))      /* object packed? */
-               add_object_entry(oid->hash, OBJ_TAG, NULL, 0);
+               add_tag_chain(oid);
        return 0;
 }
 
@@ -2226,7 +2345,7 @@ static void prepare_pack(int window, int depth)
                if (progress)
                        progress_state = start_progress(_("Compressing objects"),
                                                        nr_deltas);
-               qsort(delta_list, n, sizeof(*delta_list), type_size_sort);
+               QSORT(delta_list, n, type_size_sort);
                ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
                stop_progress(&progress_state);
                if (nr_done != nr_deltas)
@@ -2274,7 +2393,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                        write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
        }
        if (!strcmp(k, "pack.usebitmaps")) {
-               use_bitmap_index = git_config_bool(k, v);
+               use_bitmap_index_default = git_config_bool(k, v);
                return 0;
        }
        if (!strcmp(k, "pack.threads")) {
@@ -2418,8 +2537,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs)
        }
 
        if (in_pack.nr) {
-               qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
-                     ofscmp);
+               QSORT(in_pack.array, in_pack.nr, ofscmp);
                for (i = 0; i < in_pack.nr; i++) {
                        struct object *o = in_pack.array[i].object;
                        add_object_entry(o->oid.hash, o->type, "", 0);
@@ -2523,13 +2641,13 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
 }
 
 /*
- * This tracks any options which a reader of the pack might
- * not understand, and which would therefore prevent blind reuse
- * of what we have on disk.
+ * This tracks any options which pack-reuse code expects to be on, or which a
+ * reader of the pack might not understand, and which would therefore prevent
+ * blind reuse of what we have on disk.
  */
 static int pack_options_allow_reuse(void)
 {
-       return allow_ofs_delta;
+       return pack_to_stdout && allow_ofs_delta;
 }
 
 static int get_object_list_from_bitmap(struct rev_info *revs)
@@ -2822,7 +2940,23 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (!rev_list_all || !rev_list_reflog || !rev_list_index)
                unpack_unreachable_expiration = 0;
 
-       if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
+       /*
+        * "soft" reasons not to use bitmaps - for on-disk repack by default we want
+        *
+        * - to produce good pack (with bitmap index not-yet-packed objects are
+        *   packed in suboptimal order).
+        *
+        * - to use more robust pack-generation codepath (avoiding possible
+        *   bugs in bitmap code and possible bitmap index corruption).
+        */
+       if (!pack_to_stdout)
+               use_bitmap_index_default = 0;
+
+       if (use_bitmap_index < 0)
+               use_bitmap_index = use_bitmap_index_default;
+
+       /* "hard" reasons not to use bitmaps; these just won't work at all */
+       if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow())
                use_bitmap_index = 0;
 
        if (pack_to_stdout || !rev_list_all)