From: Junio C Hamano Date: Wed, 22 Aug 2018 18:17:05 +0000 (-0700) Subject: Merge branch 'nd/pack-deltify-regression-fix' X-Git-Tag: v2.19.0-rc1~22 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/29d9e3e2c47dd4b5053b0a98c891878d398463e3?ds=sidebyside;hp=-c Merge branch 'nd/pack-deltify-regression-fix' In a recent update in 2.18 era, "git pack-objects" started producing a larger than necessary packfiles by missing opportunities to use large deltas. * nd/pack-deltify-regression-fix: pack-objects: fix performance issues on packing large deltas --- 29d9e3e2c47dd4b5053b0a98c891878d398463e3 diff --combined builtin/pack-objects.c index 0d80dee2ba,297d44fc73..d1144a8f7e --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -30,7 -30,6 +30,7 @@@ #include "list.h" #include "packfile.h" #include "object-store.h" +#include "dir.h" #define IN_PACK(obj) oe_in_pack(&to_pack, obj) #define SIZE(obj) oe_size(&to_pack, obj) @@@ -58,7 -57,7 +58,7 @@@ static const char *pack_usage[] = static struct packing_data to_pack; static struct pack_idx_entry **written_list; -static uint32_t nr_result, nr_written; +static uint32_t nr_result, nr_written, nr_seen; static int non_empty; static int reuse_delta = 1, reuse_object = 1; @@@ -68,8 -67,7 +68,8 @@@ static int pack_loose_unreachable static int local; static int have_non_local_packs; static int incremental; -static int ignore_packed_keep; +static int ignore_packed_keep_on_disk; +static int ignore_packed_keep_in_core; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; static const char *base_name; @@@ -94,7 -92,7 +94,7 @@@ static uint16_t write_bitmap_options static int exclude_promisor_objects; static unsigned long delta_cache_size = 0; -static unsigned long max_delta_cache_size = 256 * 1024 * 1024; +static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE; static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; @@@ -140,7 -138,7 +140,7 @@@ static void *get_delta(struct object_en buf = read_object_file(&entry->idx.oid, &type, &size); if (!buf) - die("unable to read %s", oid_to_hex(&entry->idx.oid)); + die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); base_buf = read_object_file(&DELTA(entry)->idx.oid, &type, &base_size); if (!base_buf) @@@ -148,13 -146,8 +148,13 @@@ oid_to_hex(&DELTA(entry)->idx.oid)); delta_buf = diff_delta(base_buf, base_size, buf, size, &delta_size, 0); + /* + * We succesfully computed this delta once but dropped it for + * memory reasons. Something is very wrong if this time we + * recompute and create a different delta. + */ if (!delta_buf || delta_size != DELTA_SIZE(entry)) - die("delta size changed"); + BUG("delta size changed"); free(buf); free(base_buf); return delta_buf; @@@ -284,7 -277,6 +284,7 @@@ static unsigned long write_no_reuse_obj enum object_type type; void *buf; struct git_istream *st = NULL; + const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && @@@ -341,7 -333,7 +341,7 @@@ dheader[pos] = ofs & 127; while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); - if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) close_istream(st); free(buf); @@@ -353,19 -345,19 +353,19 @@@ } else if (type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain - * an additional 20 bytes for the base sha1. + * additional bytes for the base object ID. */ - if (limit && hdrlen + 20 + datalen + 20 >= limit) { + if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) close_istream(st); free(buf); return 0; } hashwrite(f, header, hdrlen); - hashwrite(f, DELTA(entry)->idx.oid.hash, 20); - hdrlen += 20; + hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); + hdrlen += hashsz; } else { - if (limit && hdrlen + datalen + 20 >= limit) { + if (limit && hdrlen + datalen + hashsz >= limit) { if (st) close_istream(st); free(buf); @@@ -397,7 -389,6 +397,7 @@@ static off_t write_reuse_object(struct unsigned char header[MAX_PACK_OBJECT_HEADER], dheader[MAX_PACK_OBJECT_HEADER]; unsigned hdrlen; + const unsigned hashsz = the_hash_algo->rawsz; unsigned long entry_size = SIZE(entry); if (DELTA(entry)) @@@ -411,7 -402,7 +411,7 @@@ datalen = revidx[1].offset - offset; if (!pack_to_stdout && p->index_version > 1 && check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { - error("bad packed object CRC for %s", + error(_("bad packed object CRC for %s"), oid_to_hex(&entry->idx.oid)); unuse_pack(&w_curs); return write_no_reuse_object(f, entry, limit, usable_delta); @@@ -422,7 -413,7 +422,7 @@@ if (!pack_to_stdout && p->index_version == 1 && check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) { - error("corrupt packed object for %s", + error(_("corrupt packed object for %s"), oid_to_hex(&entry->idx.oid)); unuse_pack(&w_curs); return write_no_reuse_object(f, entry, limit, usable_delta); @@@ -434,7 -425,7 +434,7 @@@ dheader[pos] = ofs & 127; while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); - if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { unuse_pack(&w_curs); return 0; } @@@ -443,16 -434,16 +443,16 @@@ hdrlen += sizeof(dheader) - pos; reused_delta++; } else if (type == OBJ_REF_DELTA) { - if (limit && hdrlen + 20 + datalen + 20 >= limit) { + if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { unuse_pack(&w_curs); return 0; } hashwrite(f, header, hdrlen); - hashwrite(f, DELTA(entry)->idx.oid.hash, 20); - hdrlen += 20; + hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); + hdrlen += hashsz; reused_delta++; } else { - if (limit && hdrlen + datalen + 20 >= limit) { + if (limit && hdrlen + datalen + hashsz >= limit) { unuse_pack(&w_curs); return 0; } @@@ -553,7 -544,7 +553,7 @@@ static enum write_one_status write_one( */ recursing = (e->idx.offset == 1); if (recursing) { - warning("recursive delta detected for object %s", + warning(_("recursive delta detected for object %s"), oid_to_hex(&e->idx.oid)); return WRITE_ONE_RECURSIVE; } else if (e->idx.offset || e->preferred_base) { @@@ -587,7 -578,7 +587,7 @@@ /* make sure off_t is sufficiently large not to wrap */ if (signed_add_overflows(*offset, size)) - die("pack too large for current definition of off_t"); + die(_("pack too large for current definition of off_t")); *offset += size; return WRITE_ONE_WRITTEN; } @@@ -753,8 -744,7 +753,8 @@@ static struct object_entry **compute_wr } if (wo_end != to_pack.nr_objects) - die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects); + die(_("ordered %u objects, expected %"PRIu32), + wo_end, to_pack.nr_objects); return wo; } @@@ -766,18 -756,18 +766,18 @@@ static off_t write_reused_pack(struct h int fd; if (!is_pack_valid(reuse_packfile)) - die("packfile is invalid: %s", reuse_packfile->pack_name); + die(_("packfile is invalid: %s"), reuse_packfile->pack_name); fd = git_open(reuse_packfile->pack_name); if (fd < 0) - die_errno("unable to open packfile for reuse: %s", + die_errno(_("unable to open packfile for reuse: %s"), reuse_packfile->pack_name); if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1) - die_errno("unable to seek in reused packfile"); + die_errno(_("unable to seek in reused packfile")); if (reuse_packfile_offset < 0) - reuse_packfile_offset = reuse_packfile->pack_size - 20; + reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz; total = to_write = reuse_packfile_offset - sizeof(struct pack_header); @@@ -785,7 -775,7 +785,7 @@@ int read_pack = xread(fd, buffer, sizeof(buffer)); if (read_pack <= 0) - die_errno("unable to read from reused packfile"); + die_errno(_("unable to read from reused packfile")); if (read_pack > to_write) read_pack = to_write; @@@ -862,11 -852,11 +862,11 @@@ static void write_pack_file(void * If so, rewrite it like in fast-import */ if (pack_to_stdout) { - hashclose(f, oid.hash, CSUM_CLOSE); + finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE); } else if (nr_written == nr_remaining) { - hashclose(f, oid.hash, CSUM_FSYNC); + finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { - int fd = hashclose(f, oid.hash, 0); + int fd = finalize_hashfile(f, oid.hash, 0); fixup_pack_header_footer(fd, oid.hash, pack_tmp_name, nr_written, oid.hash, offset); close(fd); @@@ -888,7 -878,7 +888,7 @@@ * to preserve this property. */ if (stat(pack_tmp_name, &st) < 0) { - warning_errno("failed to stat %s", pack_tmp_name); + warning_errno(_("failed to stat %s"), pack_tmp_name); } else if (!last_mtime) { last_mtime = st.st_mtime; } else { @@@ -896,7 -886,7 +896,7 @@@ utb.actime = st.st_atime; utb.modtime = --last_mtime; if (utime(pack_tmp_name, &utb) < 0) - warning_errno("failed utime() on %s", pack_tmp_name); + warning_errno(_("failed utime() on %s"), pack_tmp_name); } strbuf_addf(&tmpname, "%s-", base_name); @@@ -941,8 -931,8 +941,8 @@@ free(write_order); stop_progress(&progress_state); if (written != nr_result) - die("wrote %"PRIu32" objects while expecting %"PRIu32, - written, nr_result); + die(_("wrote %"PRIu32" objects while expecting %"PRIu32), + written, nr_result); } static int no_try_delta(const char *path) @@@ -951,7 -941,7 +951,7 @@@ if (!check) check = attr_check_initl("delta", NULL); - if (git_check_attr(path, check)) + if (git_check_attr(&the_index, path, check)) return 0; if (ATTR_FALSE(check->items[0].value)) return 1; @@@ -1008,16 -998,13 +1008,16 @@@ static int want_found_object(int exclud * Otherwise, we signal "-1" at the end to tell the caller that we do * not know either way, and it needs to check more packs. */ - if (!ignore_packed_keep && + if (!ignore_packed_keep_on_disk && + !ignore_packed_keep_in_core && (!local || !have_non_local_packs)) return 1; if (local && !p->pack_local) return 0; - if (ignore_packed_keep && p->pack_local && p->pack_keep) + if (p->pack_local && + ((ignore_packed_keep_on_disk && p->pack_keep) || + (ignore_packed_keep_in_core && p->pack_keep_in_core))) return 0; /* we don't know yet; keep looking for more packs */ @@@ -1041,7 -1028,7 +1041,7 @@@ static int want_object_in_pack(const st int want; struct list_head *pos; - if (!exclude && local && has_loose_object_nonlocal(oid->hash)) + if (!exclude && local && has_loose_object_nonlocal(oid)) return 0; /* @@@ -1119,8 -1106,6 +1119,8 @@@ static int add_object_entry(const struc off_t found_offset = 0; uint32_t index_pos; + display_progress(progress_state, ++nr_seen); + if (have_duplicate_entry(oid, exclude, &index_pos)) return 0; @@@ -1136,6 -1121,8 +1136,6 @@@ create_object_entry(oid, type, pack_name_hash(name), exclude, name && no_try_delta(name), index_pos, found_pack, found_offset); - - display_progress(progress_state, nr_result); return 1; } @@@ -1146,8 -1133,6 +1146,8 @@@ static int add_object_entry_from_bitmap { uint32_t index_pos; + display_progress(progress_state, ++nr_seen); + if (have_duplicate_entry(oid, 0, &index_pos)) return 0; @@@ -1155,6 -1140,8 +1155,6 @@@ return 0; create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset); - - display_progress(progress_state, nr_result); return 1; } @@@ -1475,7 -1462,7 +1475,7 @@@ static void check_object(struct object_ if (reuse_delta && !entry->preferred_base) base_ref = use_pack(p, &w_curs, entry->in_pack_offset + used, NULL); - entry->in_pack_header_size = used + 20; + entry->in_pack_header_size = used + the_hash_algo->rawsz; break; case OBJ_OFS_DELTA: buf = use_pack(p, &w_curs, @@@ -1486,7 -1473,7 +1486,7 @@@ while (c & 128) { ofs += 1; if (!ofs || MSB(ofs, 7)) { - error("delta base offset overflow in pack for %s", + error(_("delta base offset overflow in pack for %s"), oid_to_hex(&entry->idx.oid)); goto give_up; } @@@ -1495,7 -1482,7 +1495,7 @@@ } ofs = entry->in_pack_offset - ofs; if (ofs <= 0 || ofs >= entry->in_pack_offset) { - error("delta base offset out of bound for %s", + error(_("delta base offset out of bound for %s"), oid_to_hex(&entry->idx.oid)); goto give_up; } @@@ -1557,8 -1544,7 +1557,8 @@@ unuse_pack(&w_curs); } - oe_set_type(entry, oid_object_info(&entry->idx.oid, &canonical_size)); + oe_set_type(entry, + oid_object_info(the_repository, &entry->idx.oid, &canonical_size)); if (entry->type_valid) { SET_SIZE(entry, canonical_size); } else { @@@ -1623,15 -1609,14 +1623,15 @@@ static void drop_reused_delta(struct ob oi.sizep = &size; oi.typep = &type; - if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { + if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; * fall back to sha1_object_info, which may find another copy. * And if that fails, the error will be recorded in oe_type(entry) * and dealt with in prepare_pack(). */ - oe_set_type(entry, oid_object_info(&entry->idx.oid, &size)); + oe_set_type(entry, + oid_object_info(the_repository, &entry->idx.oid, &size)); } else { oe_set_type(entry, type); } @@@ -1676,7 -1661,7 +1676,7 @@@ static void break_delta_chains(struct o * is a bug. */ if (cur->dfs_state != DFS_NONE) - die("BUG: confusing delta dfs state in first pass: %d", + BUG("confusing delta dfs state in first pass: %d", cur->dfs_state); /* @@@ -1733,7 -1718,7 +1733,7 @@@ if (cur->dfs_state == DFS_DONE) break; else if (cur->dfs_state != DFS_ACTIVE) - die("BUG: confusing delta dfs state in second pass: %d", + BUG("confusing delta dfs state in second pass: %d", cur->dfs_state); /* @@@ -1767,10 -1752,6 +1767,10 @@@ static void get_object_details(void uint32_t i; struct object_entry **sorted_by_offset; + if (progress) + progress_state = start_progress(_("Counting objects"), + to_pack.nr_objects); + sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *)); for (i = 0; i < to_pack.nr_objects; i++) sorted_by_offset[i] = to_pack.objects + i; @@@ -1782,9 -1763,7 +1782,9 @@@ if (entry->type_valid && oe_size_greater_than(&to_pack, entry, big_file_threshold)) entry->no_try_delta = 1; + display_progress(progress_state, i + 1); } + stop_progress(&progress_state); /* * This must happen in a second pass, since we rely on the delta @@@ -1858,30 -1837,18 +1858,30 @@@ static int delta_cacheable(unsigned lon #ifndef NO_PTHREADS +/* Protect access to object database */ static pthread_mutex_t read_mutex; #define read_lock() pthread_mutex_lock(&read_mutex) #define read_unlock() pthread_mutex_unlock(&read_mutex) +/* Protect delta_cache_size */ static pthread_mutex_t cache_mutex; #define cache_lock() pthread_mutex_lock(&cache_mutex) #define cache_unlock() pthread_mutex_unlock(&cache_mutex) +/* + * Protect object list partitioning (e.g. struct thread_param) and + * progress_state + */ static pthread_mutex_t progress_mutex; #define progress_lock() pthread_mutex_lock(&progress_mutex) #define progress_unlock() pthread_mutex_unlock(&progress_mutex) +/* + * Access to struct object_entry is unprotected since each thread owns + * a portion of the main object list. Just don't access object entries + * ahead in the list because they can be stolen and would need + * progress_mutex for protection. + */ #else #define read_lock() (void)0 @@@ -1909,7 -1876,7 +1909,7 @@@ unsigned long oe_get_size_slow(struct p if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { read_lock(); - if (oid_object_info(&e->idx.oid, &size) < 0) + if (oid_object_info(the_repository, &e->idx.oid, &size) < 0) die(_("unable to get size of %s"), oid_to_hex(&e->idx.oid)); read_unlock(); @@@ -1969,7 -1936,7 +1969,7 @@@ static int try_delta(struct unpacked *t /* Now some size filtering heuristics. */ trg_size = SIZE(trg_entry); if (!DELTA(trg_entry)) { - max_size = trg_size/2 - 20; + max_size = trg_size/2 - the_hash_algo->rawsz; ref_depth = 1; } else { max_size = DELTA_SIZE(trg_entry); @@@ -1992,10 -1959,10 +1992,10 @@@ trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz); read_unlock(); if (!trg->data) - die("object %s cannot be read", + die(_("object %s cannot be read"), oid_to_hex(&trg_entry->idx.oid)); if (sz != trg_size) - die("object %s inconsistent object length (%lu vs %lu)", + die(_("object %s inconsistent object length (%lu vs %lu)"), oid_to_hex(&trg_entry->idx.oid), sz, trg_size); *mem_usage += sz; @@@ -2008,7 -1975,7 +2008,7 @@@ if (src_entry->preferred_base) { static int warned = 0; if (!warned++) - warning("object %s cannot be read", + warning(_("object %s cannot be read"), oid_to_hex(&src_entry->idx.oid)); /* * Those objects are not included in the @@@ -2018,11 -1985,11 +2018,11 @@@ */ return 0; } - die("object %s cannot be read", + die(_("object %s cannot be read"), oid_to_hex(&src_entry->idx.oid)); } if (sz != src_size) - die("object %s inconsistent object length (%lu vs %lu)", + die(_("object %s inconsistent object length (%lu vs %lu)"), oid_to_hex(&src_entry->idx.oid), sz, src_size); *mem_usage += sz; @@@ -2032,7 -1999,7 +2032,7 @@@ if (!src->index) { static int warned = 0; if (!warned++) - warning("suboptimal pack - out of memory"); + warning(_("suboptimal pack - out of memory")); return 0; } *mem_usage += sizeof_delta_index(src->index); @@@ -2041,10 -2008,6 +2041,6 @@@ delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); if (!delta_buf) return 0; - if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) { - free(delta_buf); - return 0; - } if (DELTA(trg_entry)) { /* Prefer only shallower same-sized deltas. */ @@@ -2263,19 -2226,12 +2259,19 @@@ static void try_to_free_from_threads(si static try_to_free_t old_try_to_free_routine; /* + * The main object list is split into smaller lists, each is handed to + * one worker. + * * The main thread waits on the condition that (at least) one of the workers * has stopped working (which is indicated in the .working member of * struct thread_params). + * * When a work thread has completed its work, it sets .working to 0 and * signals the main thread and waits on the condition that .data_ready * becomes 1. + * + * The main thread steals half of the work from the worker that has + * most work left to hand it to the idle worker. */ struct thread_params { @@@ -2303,6 -2259,7 +2299,7 @@@ static void init_threaded_search(void pthread_mutex_init(&cache_mutex, NULL); pthread_mutex_init(&progress_mutex, NULL); pthread_cond_init(&progress_cond, NULL); + pthread_mutex_init(&to_pack.lock, NULL); old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads); } @@@ -2366,8 -2323,8 +2363,8 @@@ static void ll_find_deltas(struct objec return; } if (progress > pack_to_stdout) - fprintf(stderr, "Delta compression using up to %d threads.\n", - delta_search_threads); + fprintf_ln(stderr, _("Delta compression using up to %d threads"), + delta_search_threads); p = xcalloc(delta_search_threads, sizeof(*p)); /* Partition the work amongst work threads. */ @@@ -2407,7 -2364,7 +2404,7 @@@ ret = pthread_create(&p[i].thread, NULL, threaded_find_deltas, &p[i]); if (ret) - die("unable to create thread: %s", strerror(ret)); + die(_("unable to create thread: %s"), strerror(ret)); active_threads++; } @@@ -2499,10 -2456,10 +2496,10 @@@ static void add_tag_chain(const struct if (packlist_find(&to_pack, oid->hash, NULL)) return; - tag = lookup_tag(oid); + tag = lookup_tag(the_repository, oid); while (1) { if (!tag || parse_tag(tag) || !tag->tagged) - die("unable to pack objects reachable from tag %s", + die(_("unable to pack objects reachable from tag %s"), oid_to_hex(oid)); add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0); @@@ -2568,7 -2525,7 +2565,7 @@@ static void prepare_pack(int window, in if (!entry->preferred_base) { nr_deltas++; if (oe_type(entry) < 0) - die("unable to get type of object %s", + die(_("unable to get type of object %s"), oid_to_hex(&entry->idx.oid)); } else { if (oe_type(entry) < 0) { @@@ -2592,7 -2549,7 +2589,7 @@@ ll_find_deltas(delta_list, n, window+1, depth, &nr_done); stop_progress(&progress_state); if (nr_done != nr_deltas) - die("inconsistency with delta count"); + die(_("inconsistency with delta count")); } free(delta_list); } @@@ -2632,11 -2589,11 +2629,11 @@@ static int git_pack_config(const char * if (!strcmp(k, "pack.threads")) { delta_search_threads = git_config_int(k, v); if (delta_search_threads < 0) - die("invalid number of threads specified (%d)", + die(_("invalid number of threads specified (%d)"), delta_search_threads); #ifdef NO_PTHREADS if (delta_search_threads != 1) { - warning("no threads support, ignoring %s", k); + warning(_("no threads support, ignoring %s"), k); delta_search_threads = 0; } #endif @@@ -2645,7 -2602,7 +2642,7 @@@ if (!strcmp(k, "pack.indexversion")) { pack_idx_opts.version = git_config_int(k, v); if (pack_idx_opts.version > 2) - die("bad pack.indexversion=%"PRIu32, + die(_("bad pack.indexversion=%"PRIu32), pack_idx_opts.version); return 0; } @@@ -2663,7 -2620,7 +2660,7 @@@ static void read_object_list_from_stdin if (feof(stdin)) break; if (!ferror(stdin)) - die("fgets returned NULL, not EOF, not error!"); + die("BUG: fgets returned NULL, not EOF, not error!"); if (errno != EINTR) die_errno("fgets"); clearerr(stdin); @@@ -2671,13 -2628,13 +2668,13 @@@ } if (line[0] == '-') { if (get_oid_hex(line+1, &oid)) - die("expected edge object ID, got garbage:\n %s", + die(_("expected edge object ID, got garbage:\n %s"), line); add_preferred_base(&oid); continue; } if (parse_oid_hex(line, &oid, &p)) - die("expected object ID, got garbage:\n %s", line); + die(_("expected object ID, got garbage:\n %s"), line); add_preferred_base_object(p + 1); add_object_entry(&oid, OBJ_NONE, p + 1, 0); @@@ -2813,10 -2770,10 +2810,10 @@@ static void add_objects_in_unpacked_pac struct object_id oid; struct object *o; - if (!p->pack_local || p->pack_keep) + if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; if (open_pack_index(p)) - die("cannot open pack index"); + die(_("cannot open pack index")); ALLOC_GROW(in_pack.array, in_pack.nr + p->num_objects, @@@ -2844,10 -2801,10 +2841,10 @@@ static int add_loose_object(const struct object_id *oid, const char *path, void *data) { - enum object_type type = oid_object_info(oid, NULL); + enum object_type type = oid_object_info(the_repository, oid, NULL); if (type < 0) { - warning("loose object at %s could not be examined", path); + warning(_("loose object at %s could not be examined"), path); return 0; } @@@ -2876,8 -2833,7 +2873,8 @@@ static int has_sha1_pack_kept_or_nonloc get_packed_git(the_repository); while (p) { - if ((!p->pack_local || p->pack_keep) && + if ((!p->pack_local || p->pack_keep || + p->pack_keep_in_core) && find_pack_entry_one(oid->hash, p)) { last_found = p; return 1; @@@ -2920,11 -2876,11 +2917,11 @@@ static void loosen_unused_packed_object struct object_id oid; for (p = get_packed_git(the_repository); p; p = p->next) { - if (!p->pack_local || p->pack_keep) + if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; if (open_pack_index(p)) - die("cannot open pack index"); + die(_("cannot open pack index")); for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); @@@ -2932,7 -2888,7 +2929,7 @@@ !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) if (force_object_loose(&oid, p->mtime)) - die("unable to force loose object"); + die(_("unable to force loose object")); } } } @@@ -2946,21 -2902,18 +2943,21 @@@ static int pack_options_allow_reuse(voi { return pack_to_stdout && allow_ofs_delta && - !ignore_packed_keep && + !ignore_packed_keep_on_disk && + !ignore_packed_keep_in_core && (!local || !have_non_local_packs) && !incremental; } static int get_object_list_from_bitmap(struct rev_info *revs) { - if (prepare_bitmap_walk(revs) < 0) + struct bitmap_index *bitmap_git; + if (!(bitmap_git = prepare_bitmap_walk(revs))) return -1; if (pack_options_allow_reuse() && !reuse_partial_packfile_from_bitmap( + bitmap_git, &reuse_packfile, &reuse_packfile_objects, &reuse_packfile_offset)) { @@@ -2969,8 -2922,7 +2966,8 @@@ display_progress(progress_state, nr_result); } - traverse_bitmap_commit_list(&add_object_entry_from_bitmap); + traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap); + free_bitmap_index(bitmap_git); return 0; } @@@ -2997,7 -2949,7 +2994,7 @@@ static void get_object_list(int ac, con setup_revisions(ac, av, &revs, NULL); /* make sure shallows are read */ - is_repository_shallow(); + is_repository_shallow(the_repository); while (fgets(line, sizeof(line), stdin) != NULL) { int len = strlen(line); @@@ -3015,21 -2967,21 +3012,21 @@@ struct object_id oid; if (get_oid_hex(line + 10, &oid)) die("not an SHA-1 '%s'", line + 10); - register_shallow(&oid); + register_shallow(the_repository, &oid); use_bitmap_index = 0; continue; } - die("not a rev '%s'", line); + die(_("not a rev '%s'"), line); } if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME)) - die("bad revision '%s'", line); + die(_("bad revision '%s'"), line); } if (use_bitmap_index && !get_object_list_from_bitmap(&revs)) return; if (prepare_revision_walk(&revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); mark_edges_uninteresting(&revs, show_edge); if (!fn_show_object) @@@ -3042,9 -2994,9 +3039,9 @@@ revs.ignore_missing_links = 1; if (add_unseen_recent_objects_to_traversal(&revs, unpack_unreachable_expiration)) - die("unable to add recent objects"); + die(_("unable to add recent objects")); if (prepare_revision_walk(&revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); traverse_commit_list(&revs, record_recent_commit, record_recent_object, NULL); } @@@ -3059,32 -3011,6 +3056,32 @@@ oid_array_clear(&recent_objects); } +static void add_extra_kept_packs(const struct string_list *names) +{ + struct packed_git *p; + + if (!names->nr) + return; + + for (p = get_packed_git(the_repository); p; p = p->next) { + const char *name = basename(p->pack_name); + int i; + + if (!p->pack_local) + continue; + + for (i = 0; i < names->nr; i++) + if (!fspathcmp(name, names->items[i].string)) + break; + + if (i < names->nr) { + p->pack_keep_in_core = 1; + ignore_packed_keep_in_core = 1; + continue; + } + } +} + static int option_parse_index_version(const struct option *opt, const char *arg, int unset) { @@@ -3124,7 -3050,6 +3121,7 @@@ int cmd_pack_objects(int argc, const ch struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_index = 0; + struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, N_("do not show progress meter"), 0), @@@ -3135,7 -3060,7 +3132,7 @@@ OPT_BOOL(0, "all-progress-implied", &all_progress_implied, N_("similar to --all-progress when progress meter is shown")), - { OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"), + { OPTION_CALLBACK, 0, "index-version", NULL, N_("[,]"), N_("write the pack index file in the specified idx format version"), 0, option_parse_index_version }, OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit, @@@ -3162,18 -3087,18 +3159,18 @@@ N_("do not create an empty pack output")), OPT_BOOL(0, "revs", &use_internal_rev_list, N_("read revision arguments from standard input")), - { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL, - N_("limit the objects to those that are not yet packed"), - PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, - { OPTION_SET_INT, 0, "all", &rev_list_all, NULL, - N_("include objects reachable from any reference"), - PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, - { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL, - N_("include objects referred by reflog entries"), - PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, - { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL, - N_("include objects referred to by the index"), - PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, + OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked, + N_("limit the objects to those that are not yet packed"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "all", &rev_list_all, + N_("include objects reachable from any reference"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "reflog", &rev_list_reflog, + N_("include objects referred by reflog entries"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, + N_("include objects referred to by the index"), + 1, PARSE_OPT_NONEG), OPT_BOOL(0, "stdout", &pack_to_stdout, N_("output pack to stdout")), OPT_BOOL(0, "include-tag", &include_tag, @@@ -3189,10 -3114,8 +3186,10 @@@ N_("create thin packs")), OPT_BOOL(0, "shallow", &shallow, N_("create packs suitable for shallow fetches")), - OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep, + OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, N_("ignore packs that have companion .keep file")), + OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), + N_("ignore this pack")), OPT_INTEGER(0, "compression", &pack_compression_level, N_("pack compression level")), OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents, @@@ -3213,7 -3136,7 +3210,7 @@@ if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS)) BUG("too many dfs states, increase OE_DFS_STATE_BITS"); - check_replace_refs = 0; + read_replace_refs = 0; reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); @@@ -3271,43 -3194,41 +3268,43 @@@ fetch_if_missing = 0; argv_array_push(&rp, "--exclude-promisor-objects"); } + if (unpack_unreachable || keep_unreachable || pack_loose_unreachable) + use_internal_rev_list = 1; if (!reuse_object) reuse_delta = 0; if (pack_compression_level == -1) pack_compression_level = Z_DEFAULT_COMPRESSION; else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION) - die("bad pack compression level %d", pack_compression_level); + die(_("bad pack compression level %d"), pack_compression_level); if (!delta_search_threads) /* --threads=0 means autodetect */ delta_search_threads = online_cpus(); #ifdef NO_PTHREADS if (delta_search_threads != 1) - warning("no threads support, ignoring --threads"); + warning(_("no threads support, ignoring --threads")); #endif if (!pack_to_stdout && !pack_size_limit) pack_size_limit = pack_size_limit_cfg; if (pack_to_stdout && pack_size_limit) - die("--max-pack-size cannot be used to build a pack for transfer."); + die(_("--max-pack-size cannot be used to build a pack for transfer")); if (pack_size_limit && pack_size_limit < 1024*1024) { - warning("minimum pack size limit is 1 MiB"); + warning(_("minimum pack size limit is 1 MiB")); pack_size_limit = 1024*1024; } if (!pack_to_stdout && thin) - die("--thin cannot be used to build an indexable pack."); + die(_("--thin cannot be used to build an indexable pack")); if (keep_unreachable && unpack_unreachable) - die("--keep-unreachable and --unpack-unreachable are incompatible."); + die(_("--keep-unreachable and --unpack-unreachable are incompatible")); if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; if (filter_options.choice) { if (!pack_to_stdout) - die("cannot use --filter without --stdout."); + die(_("cannot use --filter without --stdout")); use_bitmap_index = 0; } @@@ -3327,7 -3248,7 +3324,7 @@@ use_bitmap_index = use_bitmap_index_default; /* "hard" reasons not to use bitmaps; these just won't work at all */ - if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow()) + if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository)) use_bitmap_index = 0; if (pack_to_stdout || !rev_list_all) @@@ -3336,20 -3257,19 +3333,20 @@@ if (progress && all_progress_implied) progress = 2; - if (ignore_packed_keep) { + add_extra_kept_packs(&keep_pack_list); + if (ignore_packed_keep_on_disk) { struct packed_git *p; for (p = get_packed_git(the_repository); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ - ignore_packed_keep = 0; + ignore_packed_keep_on_disk = 0; } if (local) { /* - * unlike ignore_packed_keep above, we do not want to - * unset "local" based on looking at packs, as it - * also covers non-local objects + * unlike ignore_packed_keep_on_disk above, we do not + * want to unset "local" based on looking at packs, as + * it also covers non-local objects */ struct packed_git *p; for (p = get_packed_git(the_repository); p; p = p->next) { @@@ -3363,7 -3283,7 +3360,7 @@@ prepare_packing_data(&to_pack); if (progress) - progress_state = start_progress(_("Counting objects"), 0); + progress_state = start_progress(_("Enumerating objects"), 0); if (!use_internal_rev_list) read_object_list_from_stdin(); else { @@@ -3381,9 -3301,8 +3378,9 @@@ prepare_pack(window, depth); write_pack_file(); if (progress) - fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32")," - " reused %"PRIu32" (delta %"PRIu32")\n", - written, written_delta, reused, reused_delta); + fprintf_ln(stderr, + _("Total %"PRIu32" (delta %"PRIu32")," + " reused %"PRIu32" (delta %"PRIu32")"), + written, written_delta, reused, reused_delta); return 0; } diff --combined pack-objects.c index 92708522e7,f00a025738..6ef87e5683 --- a/pack-objects.c +++ b/pack-objects.c @@@ -60,7 -60,7 +60,7 @@@ static void rehash_objects(struct packi &found); if (found) - die("BUG: Duplicate object in hash"); + BUG("Duplicate object in hash"); pdata->index[ix] = i + 1; entry++; @@@ -146,6 -146,8 +146,8 @@@ void prepare_packing_data(struct packin pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE", 1U << OE_SIZE_BITS); + pdata->oe_delta_size_limit = git_env_ulong("GIT_TEST_OE_DELTA_SIZE", + 1UL << OE_DELTA_SIZE_BITS); } struct object_entry *packlist_alloc(struct packing_data *pdata, @@@ -160,6 -162,8 +162,8 @@@ if (!pdata->in_pack_by_idx) REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc); + if (pdata->delta_size) + REALLOC_ARRAY(pdata->delta_size, pdata->nr_alloc); } new_entry = pdata->objects + pdata->nr_objects++; diff --combined pack-objects.h index 08c6b57d49,6ffbdcf79f..62806ccc39 --- a/pack-objects.h +++ b/pack-objects.h @@@ -2,9 -2,7 +2,10 @@@ #define PACK_OBJECTS_H #include "object-store.h" + #include "thread-utils.h" +#include "pack.h" + +#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024) #define OE_DFS_STATE_BITS 2 #define OE_DEPTH_BITS 12 @@@ -15,7 -13,7 +16,7 @@@ * above this limit. Don't lower it too much. */ #define OE_SIZE_BITS 31 - #define OE_DELTA_SIZE_BITS 20 + #define OE_DELTA_SIZE_BITS 23 /* * State flags for depth-first search used for analyzing delta cycles. @@@ -95,11 -93,12 +96,12 @@@ struct object_entry */ unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */ unsigned delta_size_valid:1; + unsigned char in_pack_header_size; unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ unsigned z_delta_size:OE_Z_DELTA_BITS; unsigned type_valid:1; - unsigned type_:TYPE_BITS; unsigned no_try_delta:1; + unsigned type_:TYPE_BITS; unsigned in_pack_type:TYPE_BITS; /* could be delta */ unsigned preferred_base:1; /* * we do not pack this, but is available @@@ -109,17 -108,16 +111,16 @@@ unsigned tagged:1; /* near the very tip of refs */ unsigned filled:1; /* assigned write-order */ unsigned dfs_state:OE_DFS_STATE_BITS; - unsigned char in_pack_header_size; unsigned depth:OE_DEPTH_BITS; /* * pahole results on 64-bit linux (gcc and clang) * - * size: 80, bit_padding: 20 bits, holes: 8 bits + * size: 80, bit_padding: 9 bits * * and on 32-bit (gcc) * - * size: 76, bit_padding: 20 bits, holes: 8 bits + * size: 76, bit_padding: 9 bits */ }; @@@ -131,6 -129,7 +132,7 @@@ struct packing_data uint32_t index_size; unsigned int *in_pack_pos; + unsigned long *delta_size; /* * Only one of these can be non-NULL and they have different @@@ -141,10 -140,29 +143,29 @@@ struct packed_git **in_pack_by_idx; struct packed_git **in_pack; + #ifndef NO_PTHREADS + pthread_mutex_t lock; + #endif + uintmax_t oe_size_limit; + uintmax_t oe_delta_size_limit; }; void prepare_packing_data(struct packing_data *pdata); + + static inline void packing_data_lock(struct packing_data *pdata) + { + #ifndef NO_PTHREADS + pthread_mutex_lock(&pdata->lock); + #endif + } + static inline void packing_data_unlock(struct packing_data *pdata) + { + #ifndef NO_PTHREADS + pthread_mutex_unlock(&pdata->lock); + #endif + } + struct object_entry *packlist_alloc(struct packing_data *pdata, const unsigned char *sha1, uint32_t index_pos); @@@ -333,18 -351,34 +354,34 @@@ static inline unsigned long oe_delta_si { if (e->delta_size_valid) return e->delta_size_; - return oe_size(pack, e); + + /* + * pack->detla_size[] can't be NULL because oe_set_delta_size() + * must have been called when a new delta is saved with + * oe_set_delta(). + * If oe_delta() returns NULL (i.e. default state, which means + * delta_size_valid is also false), then the caller must never + * call oe_delta_size(). + */ + return pack->delta_size[e - pack->objects]; } static inline void oe_set_delta_size(struct packing_data *pack, struct object_entry *e, unsigned long size) { - e->delta_size_ = size; - e->delta_size_valid = e->delta_size_ == size; - if (!e->delta_size_valid && size != oe_size(pack, e)) - BUG("this can only happen in check_object() " - "where delta size is the same as entry size"); + if (size < pack->oe_delta_size_limit) { + e->delta_size_ = size; + e->delta_size_valid = 1; + } else { + packing_data_lock(pack); + if (!pack->delta_size) + ALLOC_ARRAY(pack->delta_size, pack->nr_alloc); + packing_data_unlock(pack); + + pack->delta_size[e - pack->objects] = size; + e->delta_size_valid = 0; + } } #endif