From: Junio C Hamano Date: Mon, 10 Oct 2016 21:03:46 +0000 (-0700) Subject: Merge branch 'jk/pack-objects-optim-mru' X-Git-Tag: v2.11.0-rc0~72 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/e6e24c94df9df6d39f2316113c14fe07d2ab03d7?ds=inline;hp=-c Merge branch 'jk/pack-objects-optim-mru' "git pack-objects" in a repository with many packfiles used to spend a lot of time looking for/at objects in them; the accesses to the packfiles are now optimized by checking the most-recently-used packfile first. * jk/pack-objects-optim-mru: pack-objects: use mru list when iterating over packs pack-objects: break delta cycles before delta-search phase sha1_file: make packed_object_info public provide an initializer for "struct object_info" --- e6e24c94df9df6d39f2316113c14fe07d2ab03d7 diff --combined builtin/cat-file.c index cca97a86c0,8e579980b6..30383e9eb4 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@@ -17,63 -17,32 +17,63 @@@ struct batch_options int print_contents; int buffer_output; int all_objects; + int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */ const char *format; }; +static const char *force_path; + +static int filter_object(const char *path, unsigned mode, + const struct object_id *oid, + char **buf, unsigned long *size) +{ + enum object_type type; + + *buf = read_sha1_file(oid->hash, &type, size); + if (!*buf) + return error(_("cannot read object %s '%s'"), + oid_to_hex(oid), path); + if ((type == OBJ_BLOB) && S_ISREG(mode)) { + struct strbuf strbuf = STRBUF_INIT; + if (convert_to_working_tree(path, *buf, *size, &strbuf)) { + free(*buf); + *size = strbuf.len; + *buf = strbuf_detach(&strbuf, NULL); + } + } + + return 0; +} + static int cat_one_file(int opt, const char *exp_type, const char *obj_name, int unknown_type) { - unsigned char sha1[20]; + struct object_id oid; enum object_type type; char *buf; unsigned long size; struct object_context obj_context; - struct object_info oi = {NULL}; + struct object_info oi = OBJECT_INFO_INIT; struct strbuf sb = STRBUF_INIT; unsigned flags = LOOKUP_REPLACE_OBJECT; + const char *path = force_path; if (unknown_type) flags |= LOOKUP_UNKNOWN_OBJECT; - if (get_sha1_with_context(obj_name, 0, sha1, &obj_context)) + if (get_sha1_with_context(obj_name, 0, oid.hash, &obj_context)) die("Not a valid object name %s", obj_name); + if (!path) + path = obj_context.path; + if (obj_context.mode == S_IFINVALID) + obj_context.mode = 0100644; + buf = NULL; switch (opt) { case 't': oi.typename = &sb; - if (sha1_object_info_extended(sha1, &oi, flags) < 0) + if (sha1_object_info_extended(oid.hash, &oi, flags) < 0) die("git cat-file: could not get object info"); if (sb.len) { printf("%s\n", sb.buf); @@@ -84,34 -53,24 +84,34 @@@ case 's': oi.sizep = &size; - if (sha1_object_info_extended(sha1, &oi, flags) < 0) + if (sha1_object_info_extended(oid.hash, &oi, flags) < 0) die("git cat-file: could not get object info"); printf("%lu\n", size); return 0; case 'e': - return !has_sha1_file(sha1); + return !has_object_file(&oid); + + case 'w': + if (!path[0]) + die("git cat-file --filters %s: must be " + "", obj_name); + + if (filter_object(path, obj_context.mode, + &oid, &buf, &size)) + return -1; + break; case 'c': - if (!obj_context.path[0]) + if (!path[0]) die("git cat-file --textconv %s: must be ", obj_name); - if (textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size)) + if (textconv_object(path, obj_context.mode, &oid, 1, &buf, &size)) break; case 'p': - type = sha1_object_info(sha1, NULL); + type = sha1_object_info(oid.hash, NULL); if (type < 0) die("Not a valid object name %s", obj_name); @@@ -124,8 -83,8 +124,8 @@@ } if (type == OBJ_BLOB) - return stream_blob_to_fd(1, sha1, NULL, 0); - buf = read_sha1_file(sha1, &type, &size); + return stream_blob_to_fd(1, &oid, NULL, 0); + buf = read_sha1_file(oid.hash, &type, &size); if (!buf) die("Cannot read object %s", obj_name); @@@ -134,19 -93,19 +134,19 @@@ case 0: if (type_from_string(exp_type) == OBJ_BLOB) { - unsigned char blob_sha1[20]; - if (sha1_object_info(sha1, NULL) == OBJ_TAG) { - char *buffer = read_sha1_file(sha1, &type, &size); + struct object_id blob_oid; + if (sha1_object_info(oid.hash, NULL) == OBJ_TAG) { + char *buffer = read_sha1_file(oid.hash, &type, &size); const char *target; if (!skip_prefix(buffer, "object ", &target) || - get_sha1_hex(target, blob_sha1)) - die("%s not a valid tag", sha1_to_hex(sha1)); + get_oid_hex(target, &blob_oid)) + die("%s not a valid tag", oid_to_hex(&oid)); free(buffer); } else - hashcpy(blob_sha1, sha1); + oidcpy(&blob_oid, &oid); - if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB) - return stream_blob_to_fd(1, blob_sha1, NULL, 0); + if (sha1_object_info(blob_oid.hash, NULL) == OBJ_BLOB) + return stream_blob_to_fd(1, &blob_oid, NULL, 0); /* * we attempted to dereference a tag to a blob * and failed; there may be new dereference @@@ -154,7 -113,7 +154,7 @@@ * fall-back to the usual case. */ } - buf = read_object_with_reference(sha1, exp_type, &size, NULL); + buf = read_object_with_reference(oid.hash, exp_type, &size, NULL); break; default: @@@ -169,12 -128,12 +169,12 @@@ } struct expand_data { - unsigned char sha1[20]; + struct object_id oid; enum object_type type; unsigned long size; - unsigned long disk_size; + off_t disk_size; const char *rest; - unsigned char delta_base_sha1[20]; + struct object_id delta_base_oid; /* * If mark_query is true, we do not expand anything, but rather @@@ -217,7 -176,7 +217,7 @@@ static void expand_atom(struct strbuf * if (is_atom("objectname", atom, len)) { if (!data->mark_query) - strbuf_addstr(sb, sha1_to_hex(data->sha1)); + strbuf_addstr(sb, oid_to_hex(&data->oid)); } else if (is_atom("objecttype", atom, len)) { if (data->mark_query) data->info.typep = &data->type; @@@ -232,7 -191,7 +232,7 @@@ if (data->mark_query) data->info.disk_sizep = &data->disk_size; else - strbuf_addf(sb, "%lu", data->disk_size); + strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size); } else if (is_atom("rest", atom, len)) { if (data->mark_query) data->split_on_whitespace = 1; @@@ -240,10 -199,9 +240,10 @@@ strbuf_addstr(sb, data->rest); } else if (is_atom("deltabase", atom, len)) { if (data->mark_query) - data->info.delta_base_sha1 = data->delta_base_sha1; + data->info.delta_base_sha1 = data->delta_base_oid.hash; else - strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1)); + strbuf_addstr(sb, + oid_to_hex(&data->delta_base_oid)); } else die("unknown format element: %.*s", len, atom); } @@@ -274,53 -232,28 +274,53 @@@ static void batch_write(struct batch_op static void print_object_or_die(struct batch_options *opt, struct expand_data *data) { - const unsigned char *sha1 = data->sha1; + const struct object_id *oid = &data->oid; assert(data->info.typep); if (data->type == OBJ_BLOB) { if (opt->buffer_output) fflush(stdout); - if (stream_blob_to_fd(1, sha1, NULL, 0) < 0) - die("unable to stream %s to stdout", sha1_to_hex(sha1)); + if (opt->cmdmode) { + char *contents; + unsigned long size; + + if (!data->rest) + die("missing path for '%s'", oid_to_hex(oid)); + + if (opt->cmdmode == 'w') { + if (filter_object(data->rest, 0100644, oid, + &contents, &size)) + die("could not convert '%s' %s", + oid_to_hex(oid), data->rest); + } else if (opt->cmdmode == 'c') { + enum object_type type; + if (!textconv_object(data->rest, 0100644, oid, + 1, &contents, &size)) + contents = read_sha1_file(oid->hash, &type, + &size); + if (!contents) + die("could not convert '%s' %s", + oid_to_hex(oid), data->rest); + } else + die("BUG: invalid cmdmode: %c", opt->cmdmode); + batch_write(opt, contents, size); + free(contents); + } else if (stream_blob_to_fd(1, oid, NULL, 0) < 0) + die("unable to stream %s to stdout", oid_to_hex(oid)); } else { enum object_type type; unsigned long size; void *contents; - contents = read_sha1_file(sha1, &type, &size); + contents = read_sha1_file(oid->hash, &type, &size); if (!contents) - die("object %s disappeared", sha1_to_hex(sha1)); + die("object %s disappeared", oid_to_hex(oid)); if (type != data->type) - die("object %s changed type!?", sha1_to_hex(sha1)); + die("object %s changed type!?", oid_to_hex(oid)); if (data->info.sizep && size != data->size) - die("object %s changed size!?", sha1_to_hex(sha1)); + die("object %s changed size!?", oid_to_hex(oid)); batch_write(opt, contents, size); free(contents); @@@ -333,9 -266,8 +333,9 @@@ static void batch_object_write(const ch struct strbuf buf = STRBUF_INIT; if (!data->skip_object_info && - sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { - printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1)); + sha1_object_info_extended(data->oid.hash, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { + printf("%s missing\n", + obj_name ? obj_name : oid_to_hex(&data->oid)); fflush(stdout); return; } @@@ -358,7 -290,7 +358,7 @@@ static void batch_one_object(const cha int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0; enum follow_symlinks_result result; - result = get_sha1_with_context(obj_name, flags, data->sha1, &ctx); + result = get_sha1_with_context(obj_name, flags, data->oid.hash, &ctx); if (result != FOUND) { switch (result) { case MISSING_OBJECT: @@@ -401,12 -333,11 +401,12 @@@ struct object_cb_data struct expand_data *expand; }; -static void batch_object_cb(const unsigned char sha1[20], void *vdata) +static int batch_object_cb(const unsigned char sha1[20], void *vdata) { struct object_cb_data *data = vdata; - hashcpy(data->expand->sha1, sha1); + hashcpy(data->expand->oid.hash, sha1); batch_object_write(NULL, data->opt, data->expand); + return 0; } static int batch_loose_object(const unsigned char *sha1, @@@ -445,12 -376,9 +445,11 @@@ static int batch_objects(struct batch_o data.mark_query = 1; strbuf_expand(&buf, opt->format, expand_format, &data); data.mark_query = 0; + if (opt->cmdmode) + data.split_on_whitespace = 1; if (opt->all_objects) { - struct object_info empty; - memset(&empty, 0, sizeof(empty)); + struct object_info empty = OBJECT_INFO_INIT; if (!memcmp(&data.info, &empty, sizeof(empty))) data.skip_object_info = 1; } @@@ -511,8 -439,8 +510,8 @@@ } static const char * const cat_file_usage[] = { - N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p||--textconv) "), - N_("git cat-file (--batch | --batch-check) [--follow-symlinks]"), + N_("git cat-file (-t [--allow-unknown-type] | -s [--allow-unknown-type] | -e | -p | | --textconv | --filters) [--path=] "), + N_("git cat-file (--batch | --batch-check) [--follow-symlinks] [--textconv | --filters]"), NULL }; @@@ -557,10 -485,6 +556,10 @@@ int cmd_cat_file(int argc, const char * OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'), OPT_CMDMODE(0, "textconv", &opt, N_("for blob objects, run textconv on object's content"), 'c'), + OPT_CMDMODE(0, "filters", &opt, + N_("for blob objects, run filters on object's content"), 'w'), + OPT_STRING(0, "path", &force_path, N_("blob"), + N_("use a specific path for --textconv/--filters")), OPT_BOOL(0, "allow-unknown-type", &unknown_type, N_("allow -s and -t to work with broken/corrupt objects")), OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")), @@@ -583,9 -507,7 +582,9 @@@ argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0); if (opt) { - if (argc == 1) + if (batch.enabled && (opt == 'c' || opt == 'w')) + batch.cmdmode = opt; + else if (argc == 1) obj_name = argv[0]; else usage_with_options(cat_file_usage, options); @@@ -597,28 -519,14 +596,28 @@@ } else usage_with_options(cat_file_usage, options); } - if (batch.enabled && (opt || argc)) { - usage_with_options(cat_file_usage, options); + if (batch.enabled) { + if (batch.cmdmode != opt || argc) + usage_with_options(cat_file_usage, options); + if (batch.cmdmode && batch.all_objects) + die("--batch-all-objects cannot be combined with " + "--textconv nor with --filters"); } if ((batch.follow_symlinks || batch.all_objects) && !batch.enabled) { usage_with_options(cat_file_usage, options); } + if (force_path && opt != 'c' && opt != 'w') { + error("--path= needs --textconv or --filters"); + usage_with_options(cat_file_usage, options); + } + + if (force_path && batch.enabled) { + error("--path= incompatible with --batch"); + usage_with_options(cat_file_usage, options); + } + if (batch.buffer_output < 0) batch.buffer_output = batch.all_objects; diff --combined builtin/pack-objects.c index 8aeba6a6e1,977d25f495..1e7c2a98a5 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -23,6 -23,7 +23,7 @@@ #include "reachable.h" #include "sha1-array.h" #include "argv-array.h" + #include "mru.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [...] [< | < ]"), @@@ -67,8 -68,7 +68,8 @@@ static struct packed_git *reuse_packfil static uint32_t reuse_packfile_objects; static off_t reuse_packfile_offset; -static int use_bitmap_index = 1; +static int use_bitmap_index_default = 1; +static int use_bitmap_index = -1; static int write_bitmap_index; static uint16_t write_bitmap_options; @@@ -344,15 -344,15 +345,15 @@@ static unsigned long write_no_reuse_obj } /* Return 0 if we will bust the pack-size limit */ -static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry, - unsigned long limit, int usable_delta) +static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry, + unsigned long limit, int usable_delta) { struct packed_git *p = entry->in_pack; struct pack_window *w_curs = NULL; struct revindex_entry *revidx; off_t offset; enum object_type type = entry->type; - unsigned long datalen; + off_t datalen; unsigned char header[10], dheader[10]; unsigned hdrlen; @@@ -418,12 -418,11 +419,12 @@@ } /* Return 0 if we will bust the pack-size limit */ -static unsigned long write_object(struct sha1file *f, - struct object_entry *entry, - off_t write_offset) +static off_t write_object(struct sha1file *f, + struct object_entry *entry, + off_t write_offset) { - unsigned long limit, len; + unsigned long limit; + off_t len; int usable_delta, to_reuse; if (!pack_to_stdout) @@@ -495,7 -494,7 +496,7 @@@ static enum write_one_status write_one( struct object_entry *e, off_t *offset) { - unsigned long size; + off_t size; int recursing; /* @@@ -946,79 -945,30 +947,80 @@@ static int have_duplicate_entry(const u return 1; } +static int want_found_object(int exclude, struct packed_git *p) +{ + if (exclude) + return 1; + if (incremental) + return 0; + + /* + * When asked to do --local (do not include an object that appears in a + * pack we borrow from elsewhere) or --honor-pack-keep (do not include + * an object that appears in a pack marked with .keep), finding a pack + * that matches the criteria is sufficient for us to decide to omit it. + * However, even if this pack does not satisfy the criteria, we need to + * make sure no copy of this object appears in _any_ pack that makes us + * to omit the object, so we need to check all the packs. + * + * We can however first check whether these options can possible matter; + * if they do not matter we know we want the object in generated pack. + * Otherwise, we signal "-1" at the end to tell the caller that we do + * not know either way, and it needs to check more packs. + */ + if (!ignore_packed_keep && + (!local || !have_non_local_packs)) + return 1; + + if (local && !p->pack_local) + return 0; + if (ignore_packed_keep && p->pack_local && p->pack_keep) + return 0; + + /* we don't know yet; keep looking for more packs */ + return -1; +} + /* * Check whether we want the object in the pack (e.g., we do not want * objects found in non-local stores if the "--local" option was used). * - * As a side effect of this check, we will find the packed version of this - * object, if any. We therefore pass out the pack information to avoid having - * to look it up again later. + * If the caller already knows an existing pack it wants to take the object + * from, that is passed in *found_pack and *found_offset; otherwise this + * function finds if there is any pack that has the object and returns the pack + * and its offset in these variables. */ static int want_object_in_pack(const unsigned char *sha1, int exclude, struct packed_git **found_pack, off_t *found_offset) { - struct packed_git *p; + struct mru_entry *entry; + int want; if (!exclude && local && has_loose_object_nonlocal(sha1)) return 0; - *found_pack = NULL; - *found_offset = 0; + /* + * If we already know the pack object lives in, start checks from that + * pack - in the usual case when neither --local was given nor .keep files + * are present we will determine the answer right now. + */ + if (*found_pack) { + want = want_found_object(exclude, *found_pack); + if (want != -1) + return want; + } - for (p = packed_git; p; p = p->next) { + for (entry = packed_git_mru->head; entry; entry = entry->next) { + struct packed_git *p = entry->item; - off_t offset = find_pack_entry_one(sha1, p); + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(sha1, p); + if (offset) { if (!*found_pack) { if (!is_pack_valid(p)) @@@ -1026,9 -976,33 +1028,11 @@@ *found_offset = offset; *found_pack = p; } - if (exclude) - return 1; - if (incremental) - return 0; - - /* - * When asked to do --local (do not include an - * object that appears in a pack we borrow - * from elsewhere) or --honor-pack-keep (do not - * include an object that appears in a pack marked - * with .keep), we need to make sure no copy of this - * object come from in _any_ pack that causes us to - * omit it, and need to complete this loop. When - * neither option is in effect, we know the object - * we just found is going to be packed, so break - * out of the loop to return 1 now. - */ - if (!ignore_packed_keep && - (!local || !have_non_local_packs)) { + want = want_found_object(exclude, p); ++ if (!exclude && want > 0) + mru_mark(packed_git_mru, entry); - break; - } - - if (local && !p->pack_local) - return 0; - if (ignore_packed_keep && p->pack_local && p->pack_keep) - return 0; + if (want != -1) + return want; } } @@@ -1069,8 -1043,8 +1073,8 @@@ static const char no_closure_warning[] static int add_object_entry(const unsigned char *sha1, enum object_type type, const char *name, int exclude) { - struct packed_git *found_pack; - off_t found_offset; + struct packed_git *found_pack = NULL; + off_t found_offset = 0; uint32_t index_pos; if (have_duplicate_entry(sha1, exclude, &index_pos)) @@@ -1103,9 -1077,6 +1107,9 @@@ static int add_object_entry_from_bitmap if (have_duplicate_entry(sha1, 0, &index_pos)) return 0; + if (!want_object_in_pack(sha1, 0, &pack, &offset)) + return 0; + create_object_entry(sha1, type, name_hash, 0, 0, index_pos, pack, offset); display_progress(progress_state, nr_result); @@@ -1527,6 -1498,83 +1531,83 @@@ static int pack_offset_sort(const void (a->in_pack_offset > b->in_pack_offset); } + /* + * Drop an on-disk delta we were planning to reuse. Naively, this would + * just involve blanking out the "delta" field, but we have to deal + * with some extra book-keeping: + * + * 1. Removing ourselves from the delta_sibling linked list. + * + * 2. Updating our size/type to the non-delta representation. These were + * either not recorded initially (size) or overwritten with the delta type + * (type) when check_object() decided to reuse the delta. + */ + static void drop_reused_delta(struct object_entry *entry) + { + struct object_entry **p = &entry->delta->delta_child; + struct object_info oi = OBJECT_INFO_INIT; + + while (*p) { + if (*p == entry) + *p = (*p)->delta_sibling; + else + p = &(*p)->delta_sibling; + } + entry->delta = NULL; + + oi.sizep = &entry->size; + oi.typep = &entry->type; + if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) { + /* + * We failed to get the info from this pack for some reason; + * fall back to sha1_object_info, which may find another copy. + * And if that fails, the error will be recorded in entry->type + * and dealt with in prepare_pack(). + */ + entry->type = sha1_object_info(entry->idx.sha1, &entry->size); + } + } + + /* + * Follow the chain of deltas from this entry onward, throwing away any links + * that cause us to hit a cycle (as determined by the DFS state flags in + * the entries). + */ + static void break_delta_chains(struct object_entry *entry) + { + /* If it's not a delta, it can't be part of a cycle. */ + if (!entry->delta) { + entry->dfs_state = DFS_DONE; + return; + } + + switch (entry->dfs_state) { + case DFS_NONE: + /* + * This is the first time we've seen the object. We mark it as + * part of the active potential cycle and recurse. + */ + entry->dfs_state = DFS_ACTIVE; + break_delta_chains(entry->delta); + entry->dfs_state = DFS_DONE; + break; + + case DFS_DONE: + /* object already examined, and not part of a cycle */ + break; + + case DFS_ACTIVE: + /* + * We found a cycle that needs broken. It would be correct to + * break any link in the chain, but it's convenient to + * break this one. + */ + drop_reused_delta(entry); + entry->dfs_state = DFS_DONE; + break; + } + } + static void get_object_details(void) { uint32_t i; @@@ -1535,7 -1583,7 +1616,7 @@@ sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *)); for (i = 0; i < to_pack.nr_objects; i++) sorted_by_offset[i] = to_pack.objects + i; - qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort); + QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort); for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = sorted_by_offset[i]; @@@ -1544,6 -1592,13 +1625,13 @@@ entry->no_try_delta = 1; } + /* + * This must happen in a second pass, since we rely on the delta + * information for the whole list being completed. + */ + for (i = 0; i < to_pack.nr_objects; i++) + break_delta_chains(&to_pack.objects[i]); + free(sorted_by_offset); } @@@ -2155,35 -2210,6 +2243,35 @@@ static void ll_find_deltas(struct objec #define ll_find_deltas(l, s, w, d, p) find_deltas(l, &s, w, d, p) #endif +static void add_tag_chain(const struct object_id *oid) +{ + struct tag *tag; + + /* + * We catch duplicates already in add_object_entry(), but we'd + * prefer to do this extra check to avoid having to parse the + * tag at all if we already know that it's being packed (e.g., if + * it was included via bitmaps, we would not have parsed it + * previously). + */ + if (packlist_find(&to_pack, oid->hash, NULL)) + return; + + tag = lookup_tag(oid->hash); + while (1) { + if (!tag || parse_tag(tag) || !tag->tagged) + die("unable to pack objects reachable from tag %s", + oid_to_hex(oid)); + + add_object_entry(tag->object.oid.hash, OBJ_TAG, NULL, 0); + + if (tag->tagged->type != OBJ_TAG) + return; + + tag = (struct tag *)tag->tagged; + } +} + static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data) { struct object_id peeled; @@@ -2191,7 -2217,7 +2279,7 @@@ if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, peeled.hash) && /* peelable? */ packlist_find(&to_pack, peeled.hash, NULL)) /* object packed? */ - add_object_entry(oid->hash, OBJ_TAG, NULL, 0); + add_tag_chain(oid); return 0; } @@@ -2257,7 -2283,7 +2345,7 @@@ static void prepare_pack(int window, in if (progress) progress_state = start_progress(_("Compressing objects"), nr_deltas); - qsort(delta_list, n, sizeof(*delta_list), type_size_sort); + QSORT(delta_list, n, type_size_sort); ll_find_deltas(delta_list, n, window+1, depth, &nr_done); stop_progress(&progress_state); if (nr_done != nr_deltas) @@@ -2305,7 -2331,7 +2393,7 @@@ static int git_pack_config(const char * write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE; } if (!strcmp(k, "pack.usebitmaps")) { - use_bitmap_index = git_config_bool(k, v); + use_bitmap_index_default = git_config_bool(k, v); return 0; } if (!strcmp(k, "pack.threads")) { @@@ -2449,7 -2475,8 +2537,7 @@@ static void add_objects_in_unpacked_pac } if (in_pack.nr) { - qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]), - ofscmp); + QSORT(in_pack.array, in_pack.nr, ofscmp); for (i = 0; i < in_pack.nr; i++) { struct object *o = in_pack.array[i].object; add_object_entry(o->oid.hash, o->type, "", 0); @@@ -2553,13 -2580,13 +2641,13 @@@ static void loosen_unused_packed_object } /* - * This tracks any options which a reader of the pack might - * not understand, and which would therefore prevent blind reuse - * of what we have on disk. + * This tracks any options which pack-reuse code expects to be on, or which a + * reader of the pack might not understand, and which would therefore prevent + * blind reuse of what we have on disk. */ static int pack_options_allow_reuse(void) { - return allow_ofs_delta; + return pack_to_stdout && allow_ofs_delta; } static int get_object_list_from_bitmap(struct rev_info *revs) @@@ -2852,23 -2879,7 +2940,23 @@@ int cmd_pack_objects(int argc, const ch if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) + /* + * "soft" reasons not to use bitmaps - for on-disk repack by default we want + * + * - to produce good pack (with bitmap index not-yet-packed objects are + * packed in suboptimal order). + * + * - to use more robust pack-generation codepath (avoiding possible + * bugs in bitmap code and possible bitmap index corruption). + */ + if (!pack_to_stdout) + use_bitmap_index_default = 0; + + if (use_bitmap_index < 0) + use_bitmap_index = use_bitmap_index_default; + + /* "hard" reasons not to use bitmaps; these just won't work at all */ + if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow()) use_bitmap_index = 0; if (pack_to_stdout || !rev_list_all) diff --combined cache.h index 1604e29878,1064017d13..2cfb1cab66 --- a/cache.h +++ b/cache.h @@@ -173,7 -173,7 +173,7 @@@ struct cache_entry unsigned int ce_flags; unsigned int ce_namelen; unsigned int index; /* for link extension */ - unsigned char sha1[20]; + struct object_id oid; char name[FLEX_ARRAY]; /* more */ }; @@@ -367,9 -367,8 +367,9 @@@ extern void free_name_hash(struct index #define rename_cache_entry_at(pos, new_name) rename_index_entry_at(&the_index, (pos), (new_name)) #define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos)) #define remove_file_from_cache(path) remove_file_from_index(&the_index, (path)) -#define add_to_cache(path, st, flags) add_to_index(&the_index, (path), (st), (flags), 0) -#define add_file_to_cache(path, flags) add_file_to_index(&the_index, (path), (flags), 0) +#define add_to_cache(path, st, flags) add_to_index(&the_index, (path), (st), (flags)) +#define add_file_to_cache(path, flags) add_file_to_index(&the_index, (path), (flags)) +#define chmod_cache_entry(ce, flip) chmod_index_entry(&the_index, (ce), (flip)) #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL, NULL) #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) @@@ -454,12 -453,6 +454,12 @@@ static inline enum object_type object_t */ extern const char * const local_repo_env[]; +/* + * Returns true iff we have a configured git repository (either via + * setup_git_directory, or in the environment via $GIT_DIR). + */ +int have_git_dir(void); + extern int is_bare_repository_cfg; extern int is_bare_repository(void); extern int is_inside_git_dir(void); @@@ -526,10 -519,9 +526,10 @@@ extern void verify_non_filename(const c extern int path_inside_repo(const char *prefix, const char *path); #define INIT_DB_QUIET 0x0001 +#define INIT_DB_EXIST_OK 0x0002 -extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); -extern int init_db(const char *template_dir, unsigned int flags); +extern int init_db(const char *git_dir, const char *real_git_dir, + const char *template_dir, unsigned int flags); extern void sanitize_stdfds(void); extern int daemonize(void); @@@ -589,10 -581,9 +589,10 @@@ extern int remove_file_from_index(struc #define ADD_CACHE_IGNORE_ERRORS 4 #define ADD_CACHE_IGNORE_REMOVAL 8 #define ADD_CACHE_INTENT 16 -extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags, int force_mode); -extern int add_file_to_index(struct index_state *, const char *path, int flags, int force_mode); +extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags); +extern int add_file_to_index(struct index_state *, const char *path, int flags); extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, unsigned int refresh_options); +extern int chmod_index_entry(struct index_state *, struct cache_entry *ce, char flip); extern int ce_same_name(const struct cache_entry *a, const struct cache_entry *b); extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce); extern int index_name_is_other(const struct index_state *, const char *, int); @@@ -641,7 -632,6 +641,7 @@@ extern void fill_stat_cache_info(struc #define REFRESH_IGNORE_SUBMODULES 0x0010 /* ignore submodules */ #define REFRESH_IN_PORCELAIN 0x0020 /* user friendly output, not "needs update" */ extern int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg); +extern struct cache_entry *refresh_cache_entry(struct cache_entry *, unsigned int); extern void update_index_if_able(struct index_state *, struct lock_file *); @@@ -674,15 -664,8 +674,15 @@@ extern size_t delta_base_cache_limit extern unsigned long big_file_threshold; extern unsigned long pack_size_limit_cfg; +/* + * Accessors for the core.sharedrepository config which lazy-load the value + * from the config (if not already set). The "reset" function can be + * used to unset "set" or cached value, meaning that the value will be loaded + * fresh from the config file on the next call to get_shared_repository(). + */ void set_shared_repository(int value); int get_shared_repository(void); +void reset_shared_repository(void); /* * Do replace refs need to be checked this run? This variable is @@@ -835,8 -818,8 +835,8 @@@ extern void strbuf_git_common_path(stru __attribute__((format (printf, 2, 3))); extern char *git_path_buf(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3))); -extern void strbuf_git_path_submodule(struct strbuf *sb, const char *path, - const char *fmt, ...) +extern int strbuf_git_path_submodule(struct strbuf *sb, const char *path, + const char *fmt, ...) __attribute__((format (printf, 3, 4))); extern char *git_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); @@@ -969,39 -952,22 +969,39 @@@ static inline void oidclr(struct object #define EMPTY_TREE_SHA1_BIN_LITERAL \ "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \ "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04" -#define EMPTY_TREE_SHA1_BIN \ - ((const unsigned char *) EMPTY_TREE_SHA1_BIN_LITERAL) +extern const struct object_id empty_tree_oid; +#define EMPTY_TREE_SHA1_BIN (empty_tree_oid.hash) #define EMPTY_BLOB_SHA1_HEX \ "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" #define EMPTY_BLOB_SHA1_BIN_LITERAL \ "\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \ "\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91" -#define EMPTY_BLOB_SHA1_BIN \ - ((const unsigned char *) EMPTY_BLOB_SHA1_BIN_LITERAL) +extern const struct object_id empty_blob_oid; +#define EMPTY_BLOB_SHA1_BIN (empty_blob_oid.hash) + static inline int is_empty_blob_sha1(const unsigned char *sha1) { return !hashcmp(sha1, EMPTY_BLOB_SHA1_BIN); } +static inline int is_empty_blob_oid(const struct object_id *oid) +{ + return !hashcmp(oid->hash, EMPTY_BLOB_SHA1_BIN); +} + +static inline int is_empty_tree_sha1(const unsigned char *sha1) +{ + return !hashcmp(sha1, EMPTY_TREE_SHA1_BIN); +} + +static inline int is_empty_tree_oid(const struct object_id *oid) +{ + return !hashcmp(oid->hash, EMPTY_TREE_SHA1_BIN); +} + + int git_mkstemp(char *path, size_t n, const char *template); /* set default permissions by passing mode arguments to open(2) */ @@@ -1037,11 -1003,6 +1037,11 @@@ int adjust_shared_perm(const char *path * directory while we were working. To be robust against this kind of * race, callers might want to try invoking the function again when it * returns SCLD_VANISHED. + * + * safe_create_leading_directories() temporarily changes path while it + * is working but restores it before returning. + * safe_create_leading_directories_const() doesn't modify path, even + * temporarily. */ enum scld_error { SCLD_OK = 0, @@@ -1172,16 -1133,6 +1172,16 @@@ static inline unsigned int hexval(unsig return hexval_table[c]; } +/* + * Convert two consecutive hexadecimal digits into a char. Return a + * negative value on error. Don't run over the end of short strings. + */ +static inline int hex2chr(const char *s) +{ + int val = hexval(s[0]); + return (val < 0) ? val : (val << 4) | hexval(s[1]); +} + /* Convert to/from hex/sha1 representation */ #define MINIMUM_ABBREV minimum_abbrev #define DEFAULT_ABBREV default_abbrev @@@ -1206,11 -1157,6 +1206,11 @@@ struct object_context #define GET_SHA1_FOLLOW_SYMLINKS 0100 #define GET_SHA1_ONLY_TO_DIE 04000 +#define GET_SHA1_DISAMBIGUATORS \ + (GET_SHA1_COMMIT | GET_SHA1_COMMITTISH | \ + GET_SHA1_TREE | GET_SHA1_TREEISH | \ + GET_SHA1_BLOB) + extern int get_sha1(const char *str, unsigned char *sha1); extern int get_sha1_commit(const char *str, unsigned char *sha1); extern int get_sha1_committish(const char *str, unsigned char *sha1); @@@ -1225,8 -1171,6 +1225,8 @@@ extern int get_oid(const char *str, str typedef int each_abbrev_fn(const unsigned char *sha1, void *); extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *); +extern int set_disambiguate_hint_config(const char *var, const char *value); + /* * Try to read a SHA1 in hexadecimal format from the 40 characters * starting at hex. Write the 20-byte result to sha1 in binary form. @@@ -1254,7 -1198,7 +1254,7 @@@ extern char *sha1_to_hex(const unsigne extern char *oid_to_hex(const struct object_id *oid); /* same static buffer as sha1_to_hex */ extern int interpret_branch_name(const char *str, int len, struct strbuf *); -extern int get_sha1_mb(const char *str, unsigned char *sha1); +extern int get_oid_mb(const char *str, struct object_id *oid); extern int validate_headref(const char *ref); @@@ -1280,8 -1224,7 +1280,8 @@@ struct date_mode DATE_ISO8601_STRICT, DATE_RFC2822, DATE_STRFTIME, - DATE_RAW + DATE_RAW, + DATE_UNIX } type; const char *strftime_fmt; int local; @@@ -1320,7 -1263,6 +1320,7 @@@ extern const char *ident_default_email( extern const char *git_editor(void); extern const char *git_pager(int stdout_is_tty); extern int git_ident_config(const char *, const char *, void *); +extern void reset_ident_date(void); struct ident_split { const char *name_begin; @@@ -1364,7 -1306,6 +1364,7 @@@ struct checkout not_new:1, refresh_cache:1; }; +#define CHECKOUT_INIT { NULL, "" } #define TEMPORARY_FILENAME_LENGTH 25 extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); @@@ -1395,7 -1336,6 +1395,7 @@@ extern struct alternate_object_databas } *alt_odb_list; extern void prepare_alt_odb(void); extern void read_info_alternates(const char * relative_base, int depth); +extern char *compute_alternate_path(const char *path, struct strbuf *err); extern void add_to_alternates_file(const char *reference); typedef int alt_odb_fn(struct alternate_object_database *, void *); extern int foreach_alt_odb(alt_odb_fn, void*); @@@ -1575,7 -1515,7 +1575,7 @@@ struct object_info /* Request */ enum object_type *typep; unsigned long *sizep; - unsigned long *disk_sizep; + off_t *disk_sizep; unsigned char *delta_base_sha1; struct strbuf *typename; @@@ -1602,7 -1542,15 +1602,15 @@@ } packed; } u; }; + + /* + * Initializer for a "struct object_info" that wants no items. You may + * also memset() the memory to all-zeroes. + */ + #define OBJECT_INFO_INIT {NULL} + extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); + extern int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); /* Dumb servers support */ extern int update_server_info(int); @@@ -1626,18 -1574,10 +1634,18 @@@ struct git_config_source const char *blob; }; +enum config_origin_type { + CONFIG_ORIGIN_BLOB, + CONFIG_ORIGIN_FILE, + CONFIG_ORIGIN_STDIN, + CONFIG_ORIGIN_SUBMODULE_BLOB, + CONFIG_ORIGIN_CMDLINE +}; + typedef int (*config_fn_t)(const char *, const char *, void *); extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); -extern int git_config_from_mem(config_fn_t fn, const char *origin_type, +extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type, const char *name, const char *buf, size_t len, void *data); extern void git_config_push_parameter(const char *text); extern int git_config_from_parameters(config_fn_t fn, void *data); @@@ -1781,7 -1721,7 +1789,7 @@@ extern int ignore_untracked_cache_confi struct key_value_info { const char *filename; int linenr; - const char *origin_type; + enum config_origin_type origin_type; enum config_scope scope; }; @@@ -1808,6 -1748,7 +1816,6 @@@ extern int copy_file(const char *dst, c extern int copy_file_with_time(const char *dst, const char *src, int mode); extern void write_or_die(int fd, const void *buf, size_t count); -extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); extern void fsync_or_die(int fd, const char *); extern ssize_t read_in_full(int fd, void *buf, size_t count); @@@ -1837,6 -1778,7 +1845,6 @@@ extern void write_file(const char *path /* pager.c */ extern void setup_pager(void); -extern const char *pager_program; extern int pager_in_use(void); extern int pager_use_color; extern int term_columns(void); @@@ -1869,7 -1811,7 +1877,7 @@@ void packet_trace_identity(const char * * return 0 if success, 1 - if addition of a file failed and * ADD_FILES_IGNORE_ERRORS was specified in flags */ -int add_files_to_cache(const char *prefix, const struct pathspec *pathspec, int flags, int force_mode); +int add_files_to_cache(const char *prefix, const struct pathspec *pathspec, int flags); /* diff.c */ extern int diff_auto_refresh_index; diff --combined sha1_file.c index 94daf31ec6,66edb5efcf..309e87d987 --- a/sha1_file.c +++ b/sha1_file.c @@@ -24,8 -24,6 +24,8 @@@ #include "streaming.h" #include "dir.h" #include "mru.h" +#include "list.h" +#include "mergesort.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@@ -40,12 -38,6 +40,12 @@@ static inline uintmax_t sz_fmt(size_t s const unsigned char null_sha1[20]; const struct object_id null_oid; +const struct object_id empty_tree_oid = { + EMPTY_TREE_SHA1_BIN_LITERAL +}; +const struct object_id empty_blob_oid = { + EMPTY_BLOB_SHA1_BIN_LITERAL +}; /* * This is meant to hold a *small* number of objects that you would @@@ -426,82 -418,6 +426,82 @@@ void add_to_alternates_file(const char free(alts); } +/* + * Compute the exact path an alternate is at and returns it. In case of + * error NULL is returned and the human readable error is added to `err` + * `path` may be relative and should point to $GITDIR. + * `err` must not be null. + */ +char *compute_alternate_path(const char *path, struct strbuf *err) +{ + char *ref_git = NULL; + const char *repo, *ref_git_s; + int seen_error = 0; + + ref_git_s = real_path_if_valid(path); + if (!ref_git_s) { + seen_error = 1; + strbuf_addf(err, _("path '%s' does not exist"), path); + goto out; + } else + /* + * Beware: read_gitfile(), real_path() and mkpath() + * return static buffer + */ + ref_git = xstrdup(ref_git_s); + + repo = read_gitfile(ref_git); + if (!repo) + repo = read_gitfile(mkpath("%s/.git", ref_git)); + if (repo) { + free(ref_git); + ref_git = xstrdup(repo); + } + + if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) { + char *ref_git_git = mkpathdup("%s/.git", ref_git); + free(ref_git); + ref_git = ref_git_git; + } else if (!is_directory(mkpath("%s/objects", ref_git))) { + struct strbuf sb = STRBUF_INIT; + seen_error = 1; + if (get_common_dir(&sb, ref_git)) { + strbuf_addf(err, + _("reference repository '%s' as a linked " + "checkout is not supported yet."), + path); + goto out; + } + + strbuf_addf(err, _("reference repository '%s' is not a " + "local repository."), path); + goto out; + } + + if (!access(mkpath("%s/shallow", ref_git), F_OK)) { + strbuf_addf(err, _("reference repository '%s' is shallow"), + path); + seen_error = 1; + goto out; + } + + if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) { + strbuf_addf(err, + _("reference repository '%s' is grafted"), + path); + seen_error = 1; + goto out; + } + +out: + if (seen_error) { + free(ref_git); + ref_git = NULL; + } + + return ref_git; +} + int foreach_alt_odb(alt_odb_fn fn, void *cb) { struct alternate_object_database *ent; @@@ -875,7 -791,7 +875,7 @@@ void close_all_packs(void for (p = packed_git; p; p = p->next) if (p->do_not_close) - die("BUG! Want to close pack marked 'do-not-close'"); + die("BUG: want to close pack marked 'do-not-close'"); else close_pack(p); } @@@ -1381,20 -1297,10 +1381,20 @@@ static void prepare_packed_git_one(cha strbuf_release(&path); } +static void *get_next_packed_git(const void *p) +{ + return ((const struct packed_git *)p)->next; +} + +static void set_next_packed_git(void *p, void *next) +{ + ((struct packed_git *)p)->next = next; +} + static int sort_pack(const void *a_, const void *b_) { - struct packed_git *a = *((struct packed_git **)a_); - struct packed_git *b = *((struct packed_git **)b_); + const struct packed_git *a = a_; + const struct packed_git *b = b_; int st; /* @@@ -1421,8 -1327,28 +1421,8 @@@ static void rearrange_packed_git(void) { - struct packed_git **ary, *p; - int i, n; - - for (n = 0, p = packed_git; p; p = p->next) - n++; - if (n < 2) - return; - - /* prepare an array of packed_git for easier sorting */ - ary = xcalloc(n, sizeof(struct packed_git *)); - for (n = 0, p = packed_git; p; p = p->next) - ary[n++] = p; - - qsort(ary, n, sizeof(struct packed_git *), sort_pack); - - /* link them back again */ - for (i = 0; i < n - 1; i++) - ary[i]->next = ary[i + 1]; - ary[n - 1]->next = NULL; - packed_git = ary[0]; - - free(ary); + packed_git = llist_mergesort(packed_git, get_next_packed_git, + set_next_packed_git, sort_pack); } static void prepare_packed_git_mru(void) @@@ -1646,9 -1572,7 +1646,9 @@@ unsigned long unpack_object_header_buff return used; } -int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) +static int unpack_sha1_short_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) { /* Get the data stream */ memset(stream, 0, sizeof(*stream)); @@@ -1661,31 -1585,13 +1661,31 @@@ return git_inflate(stream, 0); } +int unpack_sha1_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) +{ + int status = unpack_sha1_short_header(stream, map, mapsize, + buffer, bufsiz); + + if (status < Z_OK) + return status; + + /* Make sure we have the terminating NUL */ + if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return -1; + return 0; +} + static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz, struct strbuf *header) { int status; - status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz); + status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz); + if (status < Z_OK) + return -1; /* * Check if entire header is unpacked in the first iteration. @@@ -1776,8 -1682,6 +1776,8 @@@ static int parse_sha1_header_extended(c */ for (;;) { char c = *hdr++; + if (!c) + return -1; if (c == ' ') break; type_len++; @@@ -1788,7 -1692,7 +1788,7 @@@ strbuf_add(oi->typename, type_buf, type_len); /* * Set type to 0 if its an unknown object and - * we're obtaining the type using '--allow-unkown-type' + * we're obtaining the type using '--allow-unknown-type' * option. */ if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0)) @@@ -1826,11 -1730,9 +1826,9 @@@ int parse_sha1_header(const char *hdr, unsigned long *sizep) { - struct object_info oi; + struct object_info oi = OBJECT_INFO_INIT; oi.sizep = sizep; - oi.typename = NULL; - oi.typep = NULL; return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT); } @@@ -2068,8 -1970,8 +2066,8 @@@ unwind goto out; } - static int packed_object_info(struct packed_git *p, off_t obj_offset, - struct object_info *oi) + int packed_object_info(struct packed_git *p, off_t obj_offset, + struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; @@@ -2169,142 -2071,136 +2167,142 @@@ static void *unpack_compressed_entry(st return buffer; } -#define MAX_DELTA_CACHE (256) - +static struct hashmap delta_base_cache; static size_t delta_base_cached; -static struct delta_base_cache_lru_list { - struct delta_base_cache_lru_list *prev; - struct delta_base_cache_lru_list *next; -} delta_base_cache_lru = { &delta_base_cache_lru, &delta_base_cache_lru }; +static LIST_HEAD(delta_base_cache_lru); -static struct delta_base_cache_entry { - struct delta_base_cache_lru_list lru; - void *data; +struct delta_base_cache_key { struct packed_git *p; off_t base_offset; +}; + +struct delta_base_cache_entry { + struct hashmap hash; + struct delta_base_cache_key key; + struct list_head lru; + void *data; unsigned long size; enum object_type type; -} delta_base_cache[MAX_DELTA_CACHE]; +}; -static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) +static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset) { - unsigned long hash; + unsigned int hash; - hash = (unsigned long)(intptr_t)p + (unsigned long)base_offset; + hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset; hash += (hash >> 8) + (hash >> 16); - return hash % MAX_DELTA_CACHE; + return hash; } static struct delta_base_cache_entry * get_delta_base_cache_entry(struct packed_git *p, off_t base_offset) { - unsigned long hash = pack_entry_hash(p, base_offset); - return delta_base_cache + hash; + struct hashmap_entry entry; + struct delta_base_cache_key key; + + if (!delta_base_cache.cmpfn) + return NULL; + + hashmap_entry_init(&entry, pack_entry_hash(p, base_offset)); + key.p = p; + key.base_offset = base_offset; + return hashmap_get(&delta_base_cache, &entry, &key); } -static int eq_delta_base_cache_entry(struct delta_base_cache_entry *ent, - struct packed_git *p, off_t base_offset) +static int delta_base_cache_key_eq(const struct delta_base_cache_key *a, + const struct delta_base_cache_key *b) { - return (ent->data && ent->p == p && ent->base_offset == base_offset); + return a->p == b->p && a->base_offset == b->base_offset; +} + +static int delta_base_cache_hash_cmp(const void *va, const void *vb, + const void *vkey) +{ + const struct delta_base_cache_entry *a = va, *b = vb; + const struct delta_base_cache_key *key = vkey; + if (key) + return !delta_base_cache_key_eq(&a->key, key); + else + return !delta_base_cache_key_eq(&a->key, &b->key); } static int in_delta_base_cache(struct packed_git *p, off_t base_offset) { - struct delta_base_cache_entry *ent; - ent = get_delta_base_cache_entry(p, base_offset); - return eq_delta_base_cache_entry(ent, p, base_offset); + return !!get_delta_base_cache_entry(p, base_offset); } -static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent) +/* + * Remove the entry from the cache, but do _not_ free the associated + * entry data. The caller takes ownership of the "data" buffer, and + * should copy out any fields it wants before detaching. + */ +static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) { - ent->data = NULL; - ent->lru.next->prev = ent->lru.prev; - ent->lru.prev->next = ent->lru.next; + hashmap_remove(&delta_base_cache, ent, &ent->key); + list_del(&ent->lru); delta_base_cached -= ent->size; + free(ent); } static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, - unsigned long *base_size, enum object_type *type, int keep_cache) + unsigned long *base_size, enum object_type *type) { struct delta_base_cache_entry *ent; - void *ret; ent = get_delta_base_cache_entry(p, base_offset); - - if (!eq_delta_base_cache_entry(ent, p, base_offset)) + if (!ent) return unpack_entry(p, base_offset, type, base_size); - ret = ent->data; - - if (!keep_cache) - clear_delta_base_cache_entry(ent); - else - ret = xmemdupz(ent->data, ent->size); *type = ent->type; *base_size = ent->size; - return ret; + return xmemdupz(ent->data, ent->size); } static inline void release_delta_base_cache(struct delta_base_cache_entry *ent) { - if (ent->data) { - free(ent->data); - ent->data = NULL; - ent->lru.next->prev = ent->lru.prev; - ent->lru.prev->next = ent->lru.next; - delta_base_cached -= ent->size; - } + free(ent->data); + detach_delta_base_cache_entry(ent); } void clear_delta_base_cache(void) { - unsigned long p; - for (p = 0; p < MAX_DELTA_CACHE; p++) - release_delta_base_cache(&delta_base_cache[p]); + struct hashmap_iter iter; + struct delta_base_cache_entry *entry; + for (entry = hashmap_iter_first(&delta_base_cache, &iter); + entry; + entry = hashmap_iter_next(&iter)) { + release_delta_base_cache(entry); + } } static void add_delta_base_cache(struct packed_git *p, off_t base_offset, void *base, unsigned long base_size, enum object_type type) { - unsigned long hash = pack_entry_hash(p, base_offset); - struct delta_base_cache_entry *ent = delta_base_cache + hash; - struct delta_base_cache_lru_list *lru; + struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent)); + struct list_head *lru, *tmp; - release_delta_base_cache(ent); delta_base_cached += base_size; - for (lru = delta_base_cache_lru.next; - delta_base_cached > delta_base_cache_limit - && lru != &delta_base_cache_lru; - lru = lru->next) { - struct delta_base_cache_entry *f = (void *)lru; - if (f->type == OBJ_BLOB) - release_delta_base_cache(f); - } - for (lru = delta_base_cache_lru.next; - delta_base_cached > delta_base_cache_limit - && lru != &delta_base_cache_lru; - lru = lru->next) { - struct delta_base_cache_entry *f = (void *)lru; + list_for_each_safe(lru, tmp, &delta_base_cache_lru) { + struct delta_base_cache_entry *f = + list_entry(lru, struct delta_base_cache_entry, lru); + if (delta_base_cached <= delta_base_cache_limit) + break; release_delta_base_cache(f); } - ent->p = p; - ent->base_offset = base_offset; + ent->key.p = p; + ent->key.base_offset = base_offset; ent->type = type; ent->data = base; ent->size = base_size; - ent->lru.next = &delta_base_cache_lru; - ent->lru.prev = delta_base_cache_lru.prev; - delta_base_cache_lru.prev->next = &ent->lru; - delta_base_cache_lru.prev = &ent->lru; + list_add_tail(&ent->lru, &delta_base_cache_lru); + + if (!delta_base_cache.cmpfn) + hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, 0); + hashmap_entry_init(ent, pack_entry_hash(p, base_offset)); + hashmap_add(&delta_base_cache, ent); } static void *read_object(const unsigned char *sha1, enum object_type *type, @@@ -2348,18 -2244,18 +2346,18 @@@ void *unpack_entry(struct packed_git *p struct delta_base_cache_entry *ent; ent = get_delta_base_cache_entry(p, curpos); - if (eq_delta_base_cache_entry(ent, p, curpos)) { + if (ent) { type = ent->type; data = ent->data; size = ent->size; - clear_delta_base_cache_entry(ent); + detach_delta_base_cache_entry(ent); base_from_cache = 1; break; } if (do_check_packed_object_crc && p->index_version > 1) { struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - unsigned long len = revidx[1].offset - obj_offset; + off_t len = revidx[1].offset - obj_offset; if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { const unsigned char *sha1 = nth_packed_object_sha1(p, revidx->nr); @@@ -2408,7 -2304,7 +2406,7 @@@ case OBJ_OFS_DELTA: case OBJ_REF_DELTA: if (data) - die("BUG in unpack_entry: left loop at a valid delta"); + die("BUG: unpack_entry: left loop at a valid delta"); break; case OBJ_COMMIT: case OBJ_TREE: @@@ -2840,7 -2736,7 +2838,7 @@@ int sha1_object_info_extended(const uns int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) { enum object_type type; - struct object_info oi = {NULL}; + struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; oi.sizep = sizep; @@@ -2857,7 -2753,7 +2855,7 @@@ static void *read_packed_sha1(const uns if (!find_pack_entry(sha1, &e)) return NULL; - data = cache_or_unpack_entry(e.p, e.offset, size, type, 1); + data = cache_or_unpack_entry(e.p, e.offset, size, type); if (!data) { /* * We're probably in deep shit, but let's try to fetch diff --combined streaming.c index 3f017a1c05,431254ba41..9afa66b8be --- a/streaming.c +++ b/streaming.c @@@ -135,7 -135,7 +135,7 @@@ struct git_istream *open_istream(const struct stream_filter *filter) { struct git_istream *st; - struct object_info oi = {NULL}; + struct object_info oi = OBJECT_INFO_INIT; const unsigned char *real = lookup_replace_object(sha1); enum input_source src = istream_source(real, type, &oi); @@@ -337,17 -337,17 +337,17 @@@ static open_method_decl(loose st->u.loose.mapped = map_sha1_file(sha1, &st->u.loose.mapsize); if (!st->u.loose.mapped) return -1; - if (unpack_sha1_header(&st->z, - st->u.loose.mapped, - st->u.loose.mapsize, - st->u.loose.hdr, - sizeof(st->u.loose.hdr)) < 0) { + if ((unpack_sha1_header(&st->z, + st->u.loose.mapped, + st->u.loose.mapsize, + st->u.loose.hdr, + sizeof(st->u.loose.hdr)) < 0) || + (parse_sha1_header(st->u.loose.hdr, &st->size) < 0)) { git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); return -1; } - parse_sha1_header(st->u.loose.hdr, &st->size); st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; st->u.loose.hdr_avail = st->z.total_out; st->z_state = z_used; @@@ -497,7 -497,7 +497,7 @@@ static open_method_decl(incore * Users of streaming interface ****************************************************************/ -int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter, +int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, int can_seek) { struct git_istream *st; @@@ -506,7 -506,7 +506,7 @@@ ssize_t kept = 0; int result = -1; - st = open_istream(sha1, &type, &sz, filter); + st = open_istream(oid->hash, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter);