From: Junio C Hamano Date: Wed, 21 Sep 2016 22:15:21 +0000 (-0700) Subject: Merge branch 'ks/pack-objects-bitmap' X-Git-Tag: v2.11.0-rc0~134 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/7f109ef54e96ac7ae7e036487b881432170a3817?ds=inline;hp=-c Merge branch 'ks/pack-objects-bitmap' Some codepaths in "git pack-objects" were not ready to use an existing pack bitmap; now they are and as the result they have become faster. * ks/pack-objects-bitmap: pack-objects: use reachability bitmap index when generating non-stdout pack pack-objects: respect --local/--honor-pack-keep/--incremental when bitmap is in use --- 7f109ef54e96ac7ae7e036487b881432170a3817 diff --combined builtin/pack-objects.c index 0954375be9,d48c290d28..166e52c700 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -67,7 -67,8 +67,8 @@@ static struct packed_git *reuse_packfil static uint32_t reuse_packfile_objects; static off_t reuse_packfile_offset; - static int use_bitmap_index = 1; + static int use_bitmap_index_default = 1; + static int use_bitmap_index = -1; static int write_bitmap_index; static uint16_t write_bitmap_options; @@@ -343,15 -344,15 +344,15 @@@ static unsigned long write_no_reuse_obj } /* Return 0 if we will bust the pack-size limit */ -static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry, - unsigned long limit, int usable_delta) +static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry, + unsigned long limit, int usable_delta) { struct packed_git *p = entry->in_pack; struct pack_window *w_curs = NULL; struct revindex_entry *revidx; off_t offset; enum object_type type = entry->type; - unsigned long datalen; + off_t datalen; unsigned char header[10], dheader[10]; unsigned hdrlen; @@@ -417,12 -418,11 +418,12 @@@ } /* Return 0 if we will bust the pack-size limit */ -static unsigned long write_object(struct sha1file *f, - struct object_entry *entry, - off_t write_offset) +static off_t write_object(struct sha1file *f, + struct object_entry *entry, + off_t write_offset) { - unsigned long limit, len; + unsigned long limit; + off_t len; int usable_delta, to_reuse; if (!pack_to_stdout) @@@ -494,7 -494,7 +495,7 @@@ static enum write_one_status write_one( struct object_entry *e, off_t *offset) { - unsigned long size; + off_t size; int recursing; /* @@@ -945,13 -945,48 +946,48 @@@ static int have_duplicate_entry(const u return 1; } + static int want_found_object(int exclude, struct packed_git *p) + { + if (exclude) + return 1; + if (incremental) + return 0; + + /* + * When asked to do --local (do not include an object that appears in a + * pack we borrow from elsewhere) or --honor-pack-keep (do not include + * an object that appears in a pack marked with .keep), finding a pack + * that matches the criteria is sufficient for us to decide to omit it. + * However, even if this pack does not satisfy the criteria, we need to + * make sure no copy of this object appears in _any_ pack that makes us + * to omit the object, so we need to check all the packs. + * + * We can however first check whether these options can possible matter; + * if they do not matter we know we want the object in generated pack. + * Otherwise, we signal "-1" at the end to tell the caller that we do + * not know either way, and it needs to check more packs. + */ + if (!ignore_packed_keep && + (!local || !have_non_local_packs)) + return 1; + + if (local && !p->pack_local) + return 0; + if (ignore_packed_keep && p->pack_local && p->pack_keep) + return 0; + + /* we don't know yet; keep looking for more packs */ + return -1; + } + /* * Check whether we want the object in the pack (e.g., we do not want * objects found in non-local stores if the "--local" option was used). * - * As a side effect of this check, we will find the packed version of this - * object, if any. We therefore pass out the pack information to avoid having - * to look it up again later. + * If the caller already knows an existing pack it wants to take the object + * from, that is passed in *found_pack and *found_offset; otherwise this + * function finds if there is any pack that has the object and returns the pack + * and its offset in these variables. */ static int want_object_in_pack(const unsigned char *sha1, int exclude, @@@ -959,15 -994,30 +995,30 @@@ off_t *found_offset) { struct packed_git *p; + int want; if (!exclude && local && has_loose_object_nonlocal(sha1)) return 0; - *found_pack = NULL; - *found_offset = 0; + /* + * If we already know the pack object lives in, start checks from that + * pack - in the usual case when neither --local was given nor .keep files + * are present we will determine the answer right now. + */ + if (*found_pack) { + want = want_found_object(exclude, *found_pack); + if (want != -1) + return want; + } for (p = packed_git; p; p = p->next) { - off_t offset = find_pack_entry_one(sha1, p); + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(sha1, p); + if (offset) { if (!*found_pack) { if (!is_pack_valid(p)) @@@ -975,31 -1025,9 +1026,9 @@@ *found_offset = offset; *found_pack = p; } - if (exclude) - return 1; - if (incremental) - return 0; - - /* - * When asked to do --local (do not include an - * object that appears in a pack we borrow - * from elsewhere) or --honor-pack-keep (do not - * include an object that appears in a pack marked - * with .keep), we need to make sure no copy of this - * object come from in _any_ pack that causes us to - * omit it, and need to complete this loop. When - * neither option is in effect, we know the object - * we just found is going to be packed, so break - * out of the loop to return 1 now. - */ - if (!ignore_packed_keep && - (!local || !have_non_local_packs)) - break; - - if (local && !p->pack_local) - return 0; - if (ignore_packed_keep && p->pack_local && p->pack_keep) - return 0; + want = want_found_object(exclude, p); + if (want != -1) + return want; } } @@@ -1040,8 -1068,8 +1069,8 @@@ static const char no_closure_warning[] static int add_object_entry(const unsigned char *sha1, enum object_type type, const char *name, int exclude) { - struct packed_git *found_pack; - off_t found_offset; + struct packed_git *found_pack = NULL; + off_t found_offset = 0; uint32_t index_pos; if (have_duplicate_entry(sha1, exclude, &index_pos)) @@@ -1074,6 -1102,9 +1103,9 @@@ static int add_object_entry_from_bitmap if (have_duplicate_entry(sha1, 0, &index_pos)) return 0; + if (!want_object_in_pack(sha1, 0, &pack, &offset)) + return 0; + create_object_entry(sha1, type, name_hash, 0, 0, index_pos, pack, offset); display_progress(progress_state, nr_result); @@@ -2123,35 -2154,6 +2155,35 @@@ static void ll_find_deltas(struct objec #define ll_find_deltas(l, s, w, d, p) find_deltas(l, &s, w, d, p) #endif +static void add_tag_chain(const struct object_id *oid) +{ + struct tag *tag; + + /* + * We catch duplicates already in add_object_entry(), but we'd + * prefer to do this extra check to avoid having to parse the + * tag at all if we already know that it's being packed (e.g., if + * it was included via bitmaps, we would not have parsed it + * previously). + */ + if (packlist_find(&to_pack, oid->hash, NULL)) + return; + + tag = lookup_tag(oid->hash); + while (1) { + if (!tag || parse_tag(tag) || !tag->tagged) + die("unable to pack objects reachable from tag %s", + oid_to_hex(oid)); + + add_object_entry(tag->object.oid.hash, OBJ_TAG, NULL, 0); + + if (tag->tagged->type != OBJ_TAG) + return; + + tag = (struct tag *)tag->tagged; + } +} + static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data) { struct object_id peeled; @@@ -2159,7 -2161,7 +2191,7 @@@ if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, peeled.hash) && /* peelable? */ packlist_find(&to_pack, peeled.hash, NULL)) /* object packed? */ - add_object_entry(oid->hash, OBJ_TAG, NULL, 0); + add_tag_chain(oid); return 0; } @@@ -2273,7 -2275,7 +2305,7 @@@ static int git_pack_config(const char * write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE; } if (!strcmp(k, "pack.usebitmaps")) { - use_bitmap_index = git_config_bool(k, v); + use_bitmap_index_default = git_config_bool(k, v); return 0; } if (!strcmp(k, "pack.threads")) { @@@ -2522,13 -2524,13 +2554,13 @@@ static void loosen_unused_packed_object } /* - * This tracks any options which a reader of the pack might - * not understand, and which would therefore prevent blind reuse - * of what we have on disk. + * This tracks any options which pack-reuse code expects to be on, or which a + * reader of the pack might not understand, and which would therefore prevent + * blind reuse of what we have on disk. */ static int pack_options_allow_reuse(void) { - return allow_ofs_delta; + return pack_to_stdout && allow_ofs_delta; } static int get_object_list_from_bitmap(struct rev_info *revs) @@@ -2821,7 -2823,23 +2853,23 @@@ int cmd_pack_objects(int argc, const ch if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) + /* + * "soft" reasons not to use bitmaps - for on-disk repack by default we want + * + * - to produce good pack (with bitmap index not-yet-packed objects are + * packed in suboptimal order). + * + * - to use more robust pack-generation codepath (avoiding possible + * bugs in bitmap code and possible bitmap index corruption). + */ + if (!pack_to_stdout) + use_bitmap_index_default = 0; + + if (use_bitmap_index < 0) + use_bitmap_index = use_bitmap_index_default; + + /* "hard" reasons not to use bitmaps; these just won't work at all */ + if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow()) use_bitmap_index = 0; if (pack_to_stdout || !rev_list_all) diff --combined t/t5310-pack-bitmaps.sh index 1e376ea39e,9602e9af67..b4c7a6ff6b --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@@ -7,6 -7,18 +7,18 @@@ objpath () echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" } + # show objects present in pack ($1 should be associated *.idx) + list_packed_objects () { + git show-index <"$1" | cut -d' ' -f2 + } + + # has_any pattern-file content-file + # tests whether content-file has any entry from pattern-file with entries being + # whole lines. + has_any () { + grep -Ff "$1" "$2" + } + test_expect_success 'setup repo with moderate-sized history' ' for i in $(test_seq 1 10); do test_commit $i @@@ -16,6 -28,7 +28,7 @@@ test_commit side-$i done && git checkout master && + bitmaptip=$(git rev-parse master) && blob=$(echo tagged-blob | git hash-object -w --stdin) && git tag tagged-blob $blob && git config repack.writebitmaps true && @@@ -118,6 -131,83 +131,83 @@@ test_expect_success 'incremental repac git repack -d --no-write-bitmap-index ' + test_expect_success 'pack-objects respects --local (non-local loose)' ' + git init --bare alt.git && + echo $(pwd)/alt.git/objects >.git/objects/info/alternates && + echo content1 >file1 && + # non-local loose object which is not present in bitmapped pack + altblob=$(GIT_DIR=alt.git git hash-object -w file1) && + # non-local loose object which is also present in bitmapped pack + git cat-file blob $blob | GIT_DIR=alt.git git hash-object -w --stdin && + git add file1 && + test_tick && + git commit -m commit_file1 && + echo HEAD | git pack-objects --local --stdout --revs >1.pack && + git index-pack 1.pack && + list_packed_objects 1.idx >1.objects && + printf "%s\n" "$altblob" "$blob" >nonlocal-loose && + ! has_any nonlocal-loose 1.objects + ' + + test_expect_success 'pack-objects respects --honor-pack-keep (local non-bitmapped pack)' ' + echo content2 >file2 && + blob2=$(git hash-object -w file2) && + git add file2 && + test_tick && + git commit -m commit_file2 && + printf "%s\n" "$blob2" "$bitmaptip" >keepobjects && + pack2=$(git pack-objects pack2 .git/objects/pack/pack2-$pack2.keep && + rm $(objpath $blob2) && + echo HEAD | git pack-objects --honor-pack-keep --stdout --revs >2a.pack && + git index-pack 2a.pack && + list_packed_objects 2a.idx >2a.objects && + ! has_any keepobjects 2a.objects + ' + + test_expect_success 'pack-objects respects --local (non-local pack)' ' + mv .git/objects/pack/pack2-$pack2.* alt.git/objects/pack/ && + echo HEAD | git pack-objects --local --stdout --revs >2b.pack && + git index-pack 2b.pack && + list_packed_objects 2b.idx >2b.objects && + ! has_any keepobjects 2b.objects + ' + + test_expect_success 'pack-objects respects --honor-pack-keep (local bitmapped pack)' ' + ls .git/objects/pack/ | grep bitmap >output && + test_line_count = 1 output && + packbitmap=$(basename $(cat output) .bitmap) && + list_packed_objects .git/objects/pack/$packbitmap.idx >packbitmap.objects && + test_when_finished "rm -f .git/objects/pack/$packbitmap.keep" && + >.git/objects/pack/$packbitmap.keep && + echo HEAD | git pack-objects --honor-pack-keep --stdout --revs >3a.pack && + git index-pack 3a.pack && + list_packed_objects 3a.idx >3a.objects && + ! has_any packbitmap.objects 3a.objects + ' + + test_expect_success 'pack-objects respects --local (non-local bitmapped pack)' ' + mv .git/objects/pack/$packbitmap.* alt.git/objects/pack/ && + test_when_finished "mv alt.git/objects/pack/$packbitmap.* .git/objects/pack/" && + echo HEAD | git pack-objects --local --stdout --revs >3b.pack && + git index-pack 3b.pack && + list_packed_objects 3b.idx >3b.objects && + ! has_any packbitmap.objects 3b.objects + ' + + test_expect_success 'pack-objects to file can use bitmap' ' + # make sure we still have 1 bitmap index from previous tests + ls .git/objects/pack/ | grep bitmap >output && + test_line_count = 1 output && + # verify equivalent packs are generated with/without using bitmap index + packasha1=$(git pack-objects --no-use-bitmap-index --all packa packa.objects && + list_packed_objects packb.objects && + test_cmp packa.objects packb.objects + ' + test_expect_success 'full repack, reusing previous bitmaps' ' git repack -ad && ls .git/objects/pack/ | grep bitmap >output && @@@ -143,6 -233,20 +233,20 @@@ test_expect_success 'create objects fo EOF ' + test_expect_success 'pack-objects respects --incremental' ' + cat >revs2 <<-EOF && + HEAD + $commit + EOF + git pack-objects --incremental --stdout --revs 4.pack && + git index-pack 4.pack && + list_packed_objects 4.idx >4.objects && + test_line_count = 4 4.objects && + git rev-list --objects $commit >revlist && + cut -d" " -f1 revlist |sort >objects && + test_cmp 4.objects objects + ' + test_expect_success 'pack with missing blob' ' rm $(objpath $blob) && git pack-objects --stdout --revs /dev/null @@@ -158,6 -262,10 +262,6 @@@ test_expect_success 'pack with missing git pack-objects --stdout --revs /dev/null ' -test_lazy_prereq JGIT ' - type jgit -' - test_expect_success JGIT 'we can read jgit bitmaps' ' git clone . compat-jgit && (