From: Junio C Hamano Date: Tue, 16 Apr 2019 10:28:11 +0000 (+0900) Subject: Merge branch 'jh/midx-verify-too-many-packs' X-Git-Tag: v2.22.0-rc0~95 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/ec08c4fd951b9ea5136aaecbaec834ffb665da80?hp=26623f8f12331046beb87681d23f324f5d3e4e49 Merge branch 'jh/midx-verify-too-many-packs' "git multi-pack-index verify" did not scale well with the number of packfiles, which is being improved. * jh/midx-verify-too-many-packs: midx: during verify group objects by packfile to speed verification midx: add progress indicators in multi-pack-index verify trace2:data: add trace2 data to midx progress: add sparse mode to force 100% complete message --- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index fca70f8e4f..ae6e476ad5 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -3,6 +3,7 @@ #include "config.h" #include "parse-options.h" #include "midx.h" +#include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { N_("git multi-pack-index [--object-dir=] (write|verify)"), @@ -40,6 +41,8 @@ int cmd_multi_pack_index(int argc, const char **argv, return 1; } + trace2_cmd_mode(argv[0]); + if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) diff --git a/midx.c b/midx.c index 8a505fd423..f1cd868f8c 100644 --- a/midx.c +++ b/midx.c @@ -8,6 +8,7 @@ #include "sha1-lookup.h" #include "midx.h" #include "progress.h" +#include "trace2.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -164,6 +165,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->pack_names[i]); } + trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); + trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); + return m; cleanup_fail: @@ -958,8 +962,35 @@ static void midx_report(const char *fmt, ...) va_end(ap); } +struct pair_pos_vs_id +{ + uint32_t pos; + uint32_t pack_int_id; +}; + +static int compare_pair_pos_vs_id(const void *_a, const void *_b) +{ + struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; + struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; + + return b->pack_int_id - a->pack_int_id; +} + +/* + * Limit calls to display_progress() for performance reasons. + * The interval here was arbitrarily chosen. + */ +#define SPARSE_PROGRESS_INTERVAL (1 << 12) +#define midx_display_sparse_progress(progress, n) \ + do { \ + uint64_t _n = (n); \ + if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ + display_progress(progress, _n); \ + } while (0) + int verify_midx_file(const char *object_dir) { + struct pair_pos_vs_id *pairs = NULL; uint32_t i; struct progress *progress; struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); @@ -968,10 +999,15 @@ int verify_midx_file(const char *object_dir) if (!m) return 0; + progress = start_progress(_("Looking for referenced packfiles"), + m->num_packs); for (i = 0; i < m->num_packs; i++) { if (prepare_midx_pack(m, i)) midx_report("failed to load pack in position %d", i); + + display_progress(progress, i + 1); } + stop_progress(&progress); for (i = 0; i < 255; i++) { uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); @@ -982,6 +1018,8 @@ int verify_midx_file(const char *object_dir) i, oid_fanout1, oid_fanout2, i + 1); } + progress = start_sparse_progress(_("Verifying OID order in MIDX"), + m->num_objects - 1); for (i = 0; i < m->num_objects - 1; i++) { struct object_id oid1, oid2; @@ -991,18 +1029,47 @@ int verify_midx_file(const char *object_dir) if (oidcmp(&oid1, &oid2) >= 0) midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); + + midx_display_sparse_progress(progress, i + 1); } + stop_progress(&progress); - progress = start_progress(_("Verifying object offsets"), m->num_objects); + /* + * Create an array mapping each object to its packfile id. Sort it + * to group the objects by packfile. Use this permutation to visit + * each of the objects and only require 1 packfile to be open at a + * time. + */ + ALLOC_ARRAY(pairs, m->num_objects); + for (i = 0; i < m->num_objects; i++) { + pairs[i].pos = i; + pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); + } + + progress = start_sparse_progress(_("Sorting objects by packfile"), + m->num_objects); + display_progress(progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); + stop_progress(&progress); + + progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); for (i = 0; i < m->num_objects; i++) { struct object_id oid; struct pack_entry e; off_t m_offset, p_offset; - nth_midxed_object_oid(&oid, m, i); + if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && + m->packs[pairs[i-1].pack_int_id]) + { + close_pack_fd(m->packs[pairs[i-1].pack_int_id]); + close_pack_index(m->packs[pairs[i-1].pack_int_id]); + } + + nth_midxed_object_oid(&oid, m, pairs[i].pos); + if (!fill_midx_entry(&oid, &e, m)) { midx_report(_("failed to load pack entry for oid[%d] = %s"), - i, oid_to_hex(&oid)); + pairs[i].pos, oid_to_hex(&oid)); continue; } @@ -1017,11 +1084,13 @@ int verify_midx_file(const char *object_dir) if (m_offset != p_offset) midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), - i, oid_to_hex(&oid), m_offset, p_offset); + pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); - display_progress(progress, i + 1); + midx_display_sparse_progress(progress, i + 1); } stop_progress(&progress); + free(pairs); + return verify_midx_error; } diff --git a/packfile.c b/packfile.c index 16bcb75262..d2bcb2f860 100644 --- a/packfile.c +++ b/packfile.c @@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p) } } -static int close_pack_fd(struct packed_git *p) +int close_pack_fd(struct packed_git *p) { if (p->pack_fd < 0) return 0; diff --git a/packfile.h b/packfile.h index d70c6d9afb..b1c18504eb 100644 --- a/packfile.h +++ b/packfile.h @@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +int close_pack_fd(struct packed_git *p); + extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); diff --git a/progress.c b/progress.c index 5a99c9fbf0..212d00e524 100644 --- a/progress.c +++ b/progress.c @@ -34,6 +34,7 @@ struct progress { uint64_t total; unsigned last_percent; unsigned delay; + unsigned sparse; struct throughput *throughput; uint64_t start_ns; }; @@ -194,7 +195,7 @@ int display_progress(struct progress *progress, uint64_t n) } static struct progress *start_progress_delay(const char *title, uint64_t total, - unsigned delay) + unsigned delay, unsigned sparse) { struct progress *progress = malloc(sizeof(*progress)); if (!progress) { @@ -208,6 +209,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, progress->last_value = -1; progress->last_percent = -1; progress->delay = delay; + progress->sparse = sparse; progress->throughput = NULL; progress->start_ns = getnanotime(); set_progress_signal(); @@ -216,16 +218,46 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, struct progress *start_delayed_progress(const char *title, uint64_t total) { - return start_progress_delay(title, total, 2); + return start_progress_delay(title, total, 2, 0); } struct progress *start_progress(const char *title, uint64_t total) { - return start_progress_delay(title, total, 0); + return start_progress_delay(title, total, 0, 0); +} + +/* + * Here "sparse" means that the caller might use some sampling criteria to + * decide when to call display_progress() rather than calling it for every + * integer value in[0 .. total). In particular, the caller might not call + * display_progress() for the last value in the range. + * + * When "sparse" is set, stop_progress() will automatically force the done + * message to show 100%. + */ +struct progress *start_sparse_progress(const char *title, uint64_t total) +{ + return start_progress_delay(title, total, 0, 1); +} + +struct progress *start_delayed_sparse_progress(const char *title, + uint64_t total) +{ + return start_progress_delay(title, total, 2, 1); +} + +static void finish_if_sparse(struct progress *progress) +{ + if (progress && + progress->sparse && + progress->last_value != progress->total) + display_progress(progress, progress->total); } void stop_progress(struct progress **p_progress) { + finish_if_sparse(*p_progress); + stop_progress_msg(p_progress, _("done")); } diff --git a/progress.h b/progress.h index 70a4d4a0d6..7b725acc8d 100644 --- a/progress.h +++ b/progress.h @@ -6,7 +6,10 @@ struct progress; void display_throughput(struct progress *progress, uint64_t total); int display_progress(struct progress *progress, uint64_t n); struct progress *start_progress(const char *title, uint64_t total); +struct progress *start_sparse_progress(const char *title, uint64_t total); struct progress *start_delayed_progress(const char *title, uint64_t total); +struct progress *start_delayed_sparse_progress(const char *title, + uint64_t total); void stop_progress(struct progress **progress); void stop_progress_msg(struct progress **progress, const char *msg);