Merge branch 'jh/midx-verify-too-many-packs'
authorJunio C Hamano <gitster@pobox.com>
Tue, 16 Apr 2019 10:28:11 +0000 (19:28 +0900)
committerJunio C Hamano <gitster@pobox.com>
Tue, 16 Apr 2019 10:28:11 +0000 (19:28 +0900)
"git multi-pack-index verify" did not scale well with the number of
packfiles, which is being improved.

* jh/midx-verify-too-many-packs:
midx: during verify group objects by packfile to speed verification
midx: add progress indicators in multi-pack-index verify
trace2:data: add trace2 data to midx
progress: add sparse mode to force 100% complete message

builtin/multi-pack-index.c
midx.c
packfile.c
packfile.h
progress.c
progress.h
index fca70f8e4fcca8432619c41fd68234732a92f07a..ae6e476ad567e57e5a1e308d7901b0a0c331640f 100644 (file)
@@ -3,6 +3,7 @@
 #include "config.h"
 #include "parse-options.h"
 #include "midx.h"
+#include "trace2.h"
 
 static char const * const builtin_multi_pack_index_usage[] = {
        N_("git multi-pack-index [--object-dir=<dir>] (write|verify)"),
@@ -40,6 +41,8 @@ int cmd_multi_pack_index(int argc, const char **argv,
                return 1;
        }
 
+       trace2_cmd_mode(argv[0]);
+
        if (!strcmp(argv[0], "write"))
                return write_midx_file(opts.object_dir);
        if (!strcmp(argv[0], "verify"))
diff --git a/midx.c b/midx.c
index 8a505fd423efccf9986079542b47967cc93df7dc..f1cd868f8cf997c85996d69e5945bae64b02c9f6 100644 (file)
--- a/midx.c
+++ b/midx.c
@@ -8,6 +8,7 @@
 #include "sha1-lookup.h"
 #include "midx.h"
 #include "progress.h"
+#include "trace2.h"
 
 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
 #define MIDX_VERSION 1
@@ -164,6 +165,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
                              m->pack_names[i]);
        }
 
+       trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
+       trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
+
        return m;
 
 cleanup_fail:
@@ -958,8 +962,35 @@ static void midx_report(const char *fmt, ...)
        va_end(ap);
 }
 
+struct pair_pos_vs_id
+{
+       uint32_t pos;
+       uint32_t pack_int_id;
+};
+
+static int compare_pair_pos_vs_id(const void *_a, const void *_b)
+{
+       struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
+       struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
+
+       return b->pack_int_id - a->pack_int_id;
+}
+
+/*
+ * Limit calls to display_progress() for performance reasons.
+ * The interval here was arbitrarily chosen.
+ */
+#define SPARSE_PROGRESS_INTERVAL (1 << 12)
+#define midx_display_sparse_progress(progress, n) \
+       do { \
+               uint64_t _n = (n); \
+               if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
+                       display_progress(progress, _n); \
+       } while (0)
+
 int verify_midx_file(const char *object_dir)
 {
+       struct pair_pos_vs_id *pairs = NULL;
        uint32_t i;
        struct progress *progress;
        struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
@@ -968,10 +999,15 @@ int verify_midx_file(const char *object_dir)
        if (!m)
                return 0;
 
+       progress = start_progress(_("Looking for referenced packfiles"),
+                                 m->num_packs);
        for (i = 0; i < m->num_packs; i++) {
                if (prepare_midx_pack(m, i))
                        midx_report("failed to load pack in position %d", i);
+
+               display_progress(progress, i + 1);
        }
+       stop_progress(&progress);
 
        for (i = 0; i < 255; i++) {
                uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
@@ -982,6 +1018,8 @@ int verify_midx_file(const char *object_dir)
                                    i, oid_fanout1, oid_fanout2, i + 1);
        }
 
+       progress = start_sparse_progress(_("Verifying OID order in MIDX"),
+                                        m->num_objects - 1);
        for (i = 0; i < m->num_objects - 1; i++) {
                struct object_id oid1, oid2;
 
@@ -991,18 +1029,47 @@ int verify_midx_file(const char *object_dir)
                if (oidcmp(&oid1, &oid2) >= 0)
                        midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
                                    i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
+
+               midx_display_sparse_progress(progress, i + 1);
        }
+       stop_progress(&progress);
 
-       progress = start_progress(_("Verifying object offsets"), m->num_objects);
+       /*
+        * Create an array mapping each object to its packfile id.  Sort it
+        * to group the objects by packfile.  Use this permutation to visit
+        * each of the objects and only require 1 packfile to be open at a
+        * time.
+        */
+       ALLOC_ARRAY(pairs, m->num_objects);
+       for (i = 0; i < m->num_objects; i++) {
+               pairs[i].pos = i;
+               pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
+       }
+
+       progress = start_sparse_progress(_("Sorting objects by packfile"),
+                                        m->num_objects);
+       display_progress(progress, 0); /* TODO: Measure QSORT() progress */
+       QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
+       stop_progress(&progress);
+
+       progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
        for (i = 0; i < m->num_objects; i++) {
                struct object_id oid;
                struct pack_entry e;
                off_t m_offset, p_offset;
 
-               nth_midxed_object_oid(&oid, m, i);
+               if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
+                   m->packs[pairs[i-1].pack_int_id])
+               {
+                       close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
+                       close_pack_index(m->packs[pairs[i-1].pack_int_id]);
+               }
+
+               nth_midxed_object_oid(&oid, m, pairs[i].pos);
+
                if (!fill_midx_entry(&oid, &e, m)) {
                        midx_report(_("failed to load pack entry for oid[%d] = %s"),
-                                   i, oid_to_hex(&oid));
+                                   pairs[i].pos, oid_to_hex(&oid));
                        continue;
                }
 
@@ -1017,11 +1084,13 @@ int verify_midx_file(const char *object_dir)
 
                if (m_offset != p_offset)
                        midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
-                                   i, oid_to_hex(&oid), m_offset, p_offset);
+                                   pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
 
-               display_progress(progress, i + 1);
+               midx_display_sparse_progress(progress, i + 1);
        }
        stop_progress(&progress);
 
+       free(pairs);
+
        return verify_midx_error;
 }
index 16bcb75262d918fc7b88cc5141213e951be145cc..d2bcb2f860c50056b7014b8a7dbf804baad79be8 100644 (file)
@@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p)
        }
 }
 
-static int close_pack_fd(struct packed_git *p)
+int close_pack_fd(struct packed_git *p)
 {
        if (p->pack_fd < 0)
                return 0;
index d70c6d9afb94c77c285fe8ee3237f7a40867157a..b1c18504eb92cef76b7efca6790ba565a8f5e950 100644 (file)
@@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *);
  */
 extern void close_pack_index(struct packed_git *);
 
+int close_pack_fd(struct packed_git *p);
+
 extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
 
 extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
index 5a99c9fbf00bfb98d3a51cc7a7948b4e963b3517..212d00e524819aafe17b55f1b2f54a5bcf40dd7e 100644 (file)
@@ -34,6 +34,7 @@ struct progress {
        uint64_t total;
        unsigned last_percent;
        unsigned delay;
+       unsigned sparse;
        struct throughput *throughput;
        uint64_t start_ns;
 };
@@ -194,7 +195,7 @@ int display_progress(struct progress *progress, uint64_t n)
 }
 
 static struct progress *start_progress_delay(const char *title, uint64_t total,
-                                            unsigned delay)
+                                            unsigned delay, unsigned sparse)
 {
        struct progress *progress = malloc(sizeof(*progress));
        if (!progress) {
@@ -208,6 +209,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total,
        progress->last_value = -1;
        progress->last_percent = -1;
        progress->delay = delay;
+       progress->sparse = sparse;
        progress->throughput = NULL;
        progress->start_ns = getnanotime();
        set_progress_signal();
@@ -216,16 +218,46 @@ static struct progress *start_progress_delay(const char *title, uint64_t total,
 
 struct progress *start_delayed_progress(const char *title, uint64_t total)
 {
-       return start_progress_delay(title, total, 2);
+       return start_progress_delay(title, total, 2, 0);
 }
 
 struct progress *start_progress(const char *title, uint64_t total)
 {
-       return start_progress_delay(title, total, 0);
+       return start_progress_delay(title, total, 0, 0);
+}
+
+/*
+ * Here "sparse" means that the caller might use some sampling criteria to
+ * decide when to call display_progress() rather than calling it for every
+ * integer value in[0 .. total).  In particular, the caller might not call
+ * display_progress() for the last value in the range.
+ *
+ * When "sparse" is set, stop_progress() will automatically force the done
+ * message to show 100%.
+ */
+struct progress *start_sparse_progress(const char *title, uint64_t total)
+{
+       return start_progress_delay(title, total, 0, 1);
+}
+
+struct progress *start_delayed_sparse_progress(const char *title,
+                                              uint64_t total)
+{
+       return start_progress_delay(title, total, 2, 1);
+}
+
+static void finish_if_sparse(struct progress *progress)
+{
+       if (progress &&
+           progress->sparse &&
+           progress->last_value != progress->total)
+               display_progress(progress, progress->total);
 }
 
 void stop_progress(struct progress **p_progress)
 {
+       finish_if_sparse(*p_progress);
+
        stop_progress_msg(p_progress, _("done"));
 }
 
index 70a4d4a0d66ebda2423acf4b5dc84ba485a874b8..7b725acc8d7b6cbaa5e36743b42b29d8c22d868d 100644 (file)
@@ -6,7 +6,10 @@ struct progress;
 void display_throughput(struct progress *progress, uint64_t total);
 int display_progress(struct progress *progress, uint64_t n);
 struct progress *start_progress(const char *title, uint64_t total);
+struct progress *start_sparse_progress(const char *title, uint64_t total);
 struct progress *start_delayed_progress(const char *title, uint64_t total);
+struct progress *start_delayed_sparse_progress(const char *title,
+                                              uint64_t total);
 void stop_progress(struct progress **progress);
 void stop_progress_msg(struct progress **progress, const char *msg);