From: Junio C Hamano Date: Tue, 13 Nov 2018 13:37:19 +0000 (+0900) Subject: Merge branch 'ds/test-multi-pack-index' X-Git-Tag: v2.20.0-rc0~53 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/5fb9263295b425c7acfde66c8abe5d53fa55d2c3?hp=-c Merge branch 'ds/test-multi-pack-index' Tests for the recently introduced multi-pack index machinery. * ds/test-multi-pack-index: packfile: close multi-pack-index in close_all_packs multi-pack-index: define GIT_TEST_MULTI_PACK_INDEX midx: close multi-pack-index on repack midx: fix broken free() in close_midx() --- 5fb9263295b425c7acfde66c8abe5d53fa55d2c3 diff --combined builtin/repack.c index 82c19b7555,26dcccdafc..45583683ee --- a/builtin/repack.c +++ b/builtin/repack.c @@@ -235,8 -235,8 +235,8 @@@ static void repack_promisor_objects(con while (strbuf_getline_lf(&line, out) != EOF) { char *promisor_name; int fd; - if (line.len != 40) - die("repack: Expecting 40 character sha1 lines only from pack-objects."); + if (line.len != the_hash_algo->hexsz) + die("repack: Expecting full hex object ID lines only from pack-objects."); string_list_append(names, line.buf); /* @@@ -407,8 -407,8 +407,8 @@@ int cmd_repack(int argc, const char **a out = xfdopen(cmd.out, "r"); while (strbuf_getline_lf(&line, out) != EOF) { - if (line.len != 40) - die("repack: Expecting 40 character sha1 lines only from pack-objects."); + if (line.len != the_hash_algo->hexsz) + die("repack: Expecting full hex object ID lines only from pack-objects."); string_list_append(&names, line.buf); } fclose(out); @@@ -431,8 -431,7 +431,7 @@@ char *fname, *fname_old; if (!midx_cleared) { - /* if we move a packfile, it will invalidated the midx */ - clear_midx_file(get_object_directory()); + clear_midx_file(the_repository); midx_cleared = 1; } @@@ -535,32 -534,29 +534,36 @@@ reprepare_packed_git(the_repository); if (delete_redundant) { + const int hexsz = the_hash_algo->hexsz; int opts = 0; string_list_sort(&names); for_each_string_list_item(item, &existing_packs) { char *sha1; size_t len = strlen(item->string); - if (len < 40) + if (len < hexsz) continue; - sha1 = item->string + len - 40; + sha1 = item->string + len - hexsz; if (!string_list_has_string(&names, sha1)) remove_redundant_pack(packdir, item->string); } if (!po_args.quiet && isatty(2)) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); + + if (!keep_unreachable && + (!(pack_everything & LOOSEN_UNREACHABLE) || + unpack_unreachable) && + is_repository_shallow(the_repository)) + prune_shallow(PRUNE_QUICK); } if (!no_update_server_info) update_server_info(0); remove_temporary_files(); + + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) + write_midx_file(get_object_directory()); + string_list_clear(&names, 0); string_list_clear(&rollback, 0); string_list_clear(&existing_packs, 0); diff --combined midx.c index 4fac0cd08a,1a6fa96cda..a50b117b77 --- a/midx.c +++ b/midx.c @@@ -7,7 -7,6 +7,7 @@@ #include "object-store.h" #include "sha1-lookup.h" #include "midx.h" +#include "progress.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@@ -77,18 -76,24 +77,18 @@@ struct multi_pack_index *load_multi_pac m->local = local; m->signature = get_be32(m->data); - if (m->signature != MIDX_SIGNATURE) { - error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), + if (m->signature != MIDX_SIGNATURE) + die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), m->signature, MIDX_SIGNATURE); - goto cleanup_fail; - } m->version = m->data[MIDX_BYTE_FILE_VERSION]; - if (m->version != MIDX_VERSION) { - error(_("multi-pack-index version %d not recognized"), + if (m->version != MIDX_VERSION) + die(_("multi-pack-index version %d not recognized"), m->version); - goto cleanup_fail; - } hash_version = m->data[MIDX_BYTE_HASH_VERSION]; - if (hash_version != MIDX_HASH_VERSION) { - error(_("hash version %u does not match"), hash_version); - goto cleanup_fail; - } + if (hash_version != MIDX_HASH_VERSION) + die(_("hash version %u does not match"), hash_version); m->hash_len = MIDX_HASH_LEN; m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; @@@ -101,9 -106,6 +101,9 @@@ uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 + MIDX_CHUNKLOOKUP_WIDTH * i); + if (chunk_offset >= m->data_len) + die(_("invalid chunk offset (too large)")); + switch (chunk_id) { case MIDX_CHUNKID_PACKNAMES: m->chunk_pack_names = m->data + chunk_offset; @@@ -158,10 -160,12 +158,10 @@@ cur_pack_name += strlen(cur_pack_name) + 1; - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) { - error(_("multi-pack-index pack names out of order: '%s' before '%s'"), + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) + die(_("multi-pack-index pack names out of order: '%s' before '%s'"), m->pack_names[i - 1], m->pack_names[i]); - goto cleanup_fail; - } } return m; @@@ -176,9 -180,13 +176,13 @@@ cleanup_fail return NULL; } - static void close_midx(struct multi_pack_index *m) + void close_midx(struct multi_pack_index *m) { uint32_t i; + + if (!m) + return; + munmap((unsigned char *)m->data, m->data_len); close(m->fd); m->fd = -1; @@@ -186,7 -194,7 +190,7 @@@ for (i = 0; i < m->num_packs; i++) { if (m->packs[i]) { close_pack(m->packs[i]); - free(m->packs); + free(m->packs[i]); } } FREE_AND_NULL(m->packs); @@@ -198,8 -206,7 +202,8 @@@ int prepare_midx_pack(struct multi_pack struct strbuf pack_name = STRBUF_INIT; if (pack_int_id >= m->num_packs) - BUG("bad pack-int-id"); + die(_("bad pack-int-id: %u (%u total packs"), + pack_int_id, m->num_packs); if (m->packs[pack_int_id]) return 0; @@@ -238,7 -245,7 +242,7 @@@ static off_t nth_midxed_offset(struct m offset32 = get_be32(offset_data + sizeof(uint32_t)); if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { - if (sizeof(offset32) < sizeof(uint64_t)) + if (sizeof(off_t) < sizeof(uint64_t)) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; @@@ -282,8 -289,8 +286,8 @@@ static int nth_midxed_pack_entry(struc struct object_id oid; nth_midxed_object_oid(&oid, m, pos); for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(oid.hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) + if (hasheq(oid.hash, + p->bad_object_sha1 + the_hash_algo->rawsz * i)) return 0; } @@@ -331,9 -338,14 +335,14 @@@ int prepare_multi_pack_index_one(struc struct multi_pack_index *m; struct multi_pack_index *m_search; int config_value; + static int env_value = -1; + + if (env_value < 0) + env_value = git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0); - if (repo_config_get_bool(r, "core.multipackindex", &config_value) || - !config_value) + if (!env_value && + (repo_config_get_bool(r, "core.multipackindex", &config_value) || + !config_value)) return 0; for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) @@@ -580,8 -592,8 +589,8 @@@ static struct pack_midx_entry *get_sort * Take only the first duplicate. */ for (cur_object = 0; cur_object < nr_fanout; cur_object++) { - if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid, - &entries_by_fanout[cur_object].oid)) + if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid, + &entries_by_fanout[cur_object].oid)) continue; ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); @@@ -914,9 -926,14 +923,14 @@@ cleanup return 0; } - void clear_midx_file(const char *object_dir) + void clear_midx_file(struct repository *r) { - char *midx = get_midx_filename(object_dir); + char *midx = get_midx_filename(r->objects->objectdir); + + if (r->objects && r->objects->multi_pack_index) { + close_midx(r->objects->multi_pack_index); + r->objects->multi_pack_index = NULL; + } if (remove_path(midx)) { UNLEAK(midx); @@@ -925,83 -942,3 +939,83 @@@ free(midx); } + +static int verify_midx_error; + +static void midx_report(const char *fmt, ...) +{ + va_list ap; + verify_midx_error = 1; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + +int verify_midx_file(const char *object_dir) +{ + uint32_t i; + struct progress *progress; + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + verify_midx_error = 0; + + if (!m) + return 0; + + for (i = 0; i < m->num_packs; i++) { + if (prepare_midx_pack(m, i)) + midx_report("failed to load pack in position %d", i); + } + + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]); + + if (oid_fanout1 > oid_fanout2) + midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), + i, oid_fanout1, oid_fanout2, i + 1); + } + + for (i = 0; i < m->num_objects - 1; i++) { + struct object_id oid1, oid2; + + nth_midxed_object_oid(&oid1, m, i); + nth_midxed_object_oid(&oid2, m, i + 1); + + if (oidcmp(&oid1, &oid2) >= 0) + midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), + i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); + } + + progress = start_progress(_("Verifying object offsets"), m->num_objects); + for (i = 0; i < m->num_objects; i++) { + struct object_id oid; + struct pack_entry e; + off_t m_offset, p_offset; + + nth_midxed_object_oid(&oid, m, i); + if (!fill_midx_entry(&oid, &e, m)) { + midx_report(_("failed to load pack entry for oid[%d] = %s"), + i, oid_to_hex(&oid)); + continue; + } + + if (open_pack_index(e.p)) { + midx_report(_("failed to load pack-index for packfile %s"), + e.p->pack_name); + break; + } + + m_offset = e.offset; + p_offset = find_pack_entry_one(oid.hash, e.p); + + if (m_offset != p_offset) + midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), + i, oid_to_hex(&oid), m_offset, p_offset); + + display_progress(progress, i + 1); + } + stop_progress(&progress); + + return verify_midx_error; +} diff --combined midx.h index 1d6c21afe3,228016088e..774f652530 --- a/midx.h +++ b/midx.h @@@ -1,11 -1,10 +1,13 @@@ -#ifndef __MIDX_H__ -#define __MIDX_H__ +#ifndef MIDX_H +#define MIDX_H #include "repository.h" +struct object_id; +struct pack_entry; + + #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" + struct multi_pack_index { struct multi_pack_index *next; @@@ -45,7 -44,8 +47,9 @@@ int midx_contains_pack(struct multi_pac int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); int write_midx_file(const char *object_dir); - void clear_midx_file(const char *object_dir); + void clear_midx_file(struct repository *r); +int verify_midx_file(const char *object_dir); + void close_midx(struct multi_pack_index *m); + #endif diff --combined packfile.c index f2850a00b5,37fcd8f136..d1e6683ffe --- a/packfile.c +++ b/packfile.c @@@ -80,8 -80,10 +80,8 @@@ void pack_report(void static int check_packed_git_idx(const char *path, struct packed_git *p) { void *idx_map; - struct pack_idx_header *hdr; size_t idx_size; - uint32_t version, nr, i, *index; - int fd = git_open(path); + int fd = git_open(path), ret; struct stat st; const unsigned int hashsz = the_hash_algo->rawsz; @@@ -99,32 -101,16 +99,32 @@@ idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - hdr = idx_map; + ret = load_idx(path, hashsz, idx_map, idx_size, p); + + if (ret) + munmap(idx_map, idx_size); + + return ret; +} + +int load_idx(const char *path, const unsigned int hashsz, void *idx_map, + size_t idx_size, struct packed_git *p) +{ + struct pack_idx_header *hdr = idx_map; + uint32_t version, nr, i, *index; + + if (idx_size < 4 * 256 + hashsz + hashsz) + return error("index file %s is too small", path); + if (idx_map == NULL) + return error("empty data"); + if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { version = ntohl(hdr->idx_version); - if (version < 2 || version > 2) { - munmap(idx_map, idx_size); + if (version < 2 || version > 2) return error("index file %s is version %"PRIu32 " and is not supported by this binary" " (try upgrading GIT to a newer version)", path, version); - } } else version = 1; @@@ -134,8 -120,10 +134,8 @@@ index += 2; /* skip index header */ for (i = 0; i < 256; i++) { uint32_t n = ntohl(index[i]); - if (n < nr) { - munmap(idx_map, idx_size); + if (n < nr) return error("non-monotonic index %s", path); - } nr = n; } @@@ -147,8 -135,10 +147,8 @@@ * - hash of the packfile * - file checksum */ - if (idx_size != 4*256 + nr * (hashsz + 4) + hashsz + hashsz) { - munmap(idx_map, idx_size); + if (idx_size != 4 * 256 + nr * (hashsz + 4) + hashsz + hashsz) return error("wrong index v1 file size in %s", path); - } } else if (version == 2) { /* * Minimum size: @@@ -167,16 -157,20 +167,16 @@@ unsigned long max_size = min_size; if (nr) max_size += (nr - 1)*8; - if (idx_size < min_size || idx_size > max_size) { - munmap(idx_map, idx_size); + if (idx_size < min_size || idx_size > max_size) return error("wrong index v2 file size in %s", path); - } if (idx_size != min_size && /* * make sure we can deal with large pack offsets. * 31-bit signed offset won't be enough, neither * 32-bit unsigned one will be. */ - (sizeof(off_t) <= 4)) { - munmap(idx_map, idx_size); + (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); - } } p->index_version = version; @@@ -345,6 -339,11 +345,11 @@@ void close_all_packs(struct raw_object_ BUG("want to close pack marked 'do-not-close'"); else close_pack(p); + + if (o->multi_pack_index) { + close_midx(o->multi_pack_index); + o->multi_pack_index = NULL; + } } /* @@@ -1127,14 -1126,13 +1132,14 @@@ int unpack_object_header(struct packed_ void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) { unsigned i; + const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i)) + if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); p->num_bad_objects++; } diff --combined t/README index 2e9bef2852,9d0277c338..242497455f --- a/t/README +++ b/t/README @@@ -154,7 -154,6 +154,7 @@@ appropriately before running "make" As the names depend on the tests' file names, it is safe to run the tests with this option in parallel. +-V:: --verbose-log:: Write verbose output to the same logfile as `--tee`, but do _not_ write it to stdout. Unlike `--tee --verbose`, this option @@@ -316,7 -315,7 +316,7 @@@ packs on demand. This normally only hap over 2GB. This variable forces the code path on any object larger than bytes. -GIT_TEST_OE_DELTA_SIZE= exercises the uncomon pack-objects code +GIT_TEST_OE_DELTA_SIZE= exercises the uncommon pack-objects code path where deltas larger than this limit require extra memory allocation for bookkeeping. @@@ -328,22 -327,10 +328,26 @@@ GIT_TEST_COMMIT_GRAPH=, when t be written after every 'git commit' command, and overrides the 'core.commitGraph' setting to true. +GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor +code path for utilizing a file system monitor to speed up detecting +new or changed files. + +GIT_TEST_INDEX_VERSION= exercises the index read/write code path +for the index version specified. Can be set to any valid version +(currently 2, 3, or 4). + +GIT_TEST_PRELOAD_INDEX= exercises the preload-index code path +by overriding the minimum number of cache entries required per thread. + +GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading +of the index for the whole test suite by bypassing the default number of +cache entries and thread minimums. Setting this to 1 will make the +index loading single threaded. + + GIT_TEST_MULTI_PACK_INDEX=, when true, forces the multi-pack- + index to be written after every 'git repack' command, and overrides the + 'core.multiPackIndex' setting to true. + Naming Tests ------------ @@@ -418,13 -405,13 +422,13 @@@ This test harness library does the foll consistently when command line arguments --verbose (or -v), --debug (or -d), and --immediate (or -i) is given. -Do's, don'ts & things to keep in mind -------------------------------------- +Do's & don'ts +------------- Here are a few examples of things you probably should and shouldn't do when writing tests. -Do: +Here are the "do's:" - Put all code inside test_expect_success and other assertions. @@@ -469,21 -456,16 +473,21 @@@ Windows, where the shell (MSYS bash) mangles absolute path names. For details, see the commit message of 4114156ae9. -Don't: + - Remember that inside the