From: Junio C Hamano Date: Thu, 28 Jul 2016 17:34:42 +0000 (-0700) Subject: Merge branch 'nd/pack-ofs-4gb-limit' X-Git-Tag: v2.10.0-rc0~77 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/ad2d77760434e1650c186c71fa04a8fdbd77266c?ds=inline;hp=-c Merge branch 'nd/pack-ofs-4gb-limit' "git pack-objects" and "git index-pack" mostly operate with off_t when talking about the offset of objects in a packfile, but there were a handful of places that used "unsigned long" to hold that value, leading to an unintended truncation. * nd/pack-ofs-4gb-limit: fsck: use streaming interface for large blobs in pack pack-objects: do not truncate result in-pack object size on 32-bit systems index-pack: correct "offset" type in unpack_entry_data() index-pack: report correct bad object offsets even if they are large index-pack: correct "len" type in unpack_data() sha1_file.c: use type off_t* for object_info->disk_sizep pack-objects: pass length to check_pack_crc() without truncation --- ad2d77760434e1650c186c71fa04a8fdbd77266c diff --combined builtin/cat-file.c index 618103fdee,13ed944d2d..2dfe6265f7 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@@ -131,7 -131,7 +131,7 @@@ struct expand_data unsigned char sha1[20]; enum object_type type; unsigned long size; - unsigned long disk_size; + off_t disk_size; const char *rest; unsigned char delta_base_sha1[20]; @@@ -154,13 -154,6 +154,13 @@@ * elements above, so you can retrieve the response from there. */ struct object_info info; + + /* + * This flag will be true if the requested batch format and options + * don't require us to call sha1_object_info, which can then be + * optimized out. + */ + unsigned skip_object_info : 1; }; static int is_atom(const char *atom, const char *s, int slen) @@@ -191,7 -184,7 +191,7 @@@ static void expand_atom(struct strbuf * if (data->mark_query) data->info.disk_sizep = &data->disk_size; else - strbuf_addf(sb, "%lu", data->disk_size); + strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size); } else if (is_atom("rest", atom, len)) { if (data->mark_query) data->split_on_whitespace = 1; @@@ -265,8 -258,7 +265,8 @@@ static void batch_object_write(const ch { struct strbuf buf = STRBUF_INIT; - if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { + if (!data->skip_object_info && + sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1)); fflush(stdout); return; @@@ -377,13 -369,6 +377,13 @@@ static int batch_objects(struct batch_o strbuf_expand(&buf, opt->format, expand_format, &data); data.mark_query = 0; + if (opt->all_objects) { + struct object_info empty; + memset(&empty, 0, sizeof(empty)); + if (!memcmp(&data.info, &empty, sizeof(empty))) + data.skip_object_info = 1; + } + /* * If we are printing out the object, then always fill in the type, * since we will want to decide whether or not to stream. @@@ -504,7 -489,6 +504,7 @@@ int cmd_cat_file(int argc, const char * git_config(git_cat_file_config, NULL); + batch.buffer_output = -1; argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0); if (opt) { @@@ -528,9 -512,6 +528,9 @@@ usage_with_options(cat_file_usage, options); } + if (batch.buffer_output < 0) + batch.buffer_output = batch.all_objects; + if (batch.enabled) return batch_objects(&batch); diff --combined builtin/fsck.c index c6d17e63fd,b08bc8be24..2de272ea36 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@@ -13,7 -13,6 +13,7 @@@ #include "dir.h" #include "progress.h" #include "streaming.h" +#include "decorate.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@@ -36,26 -35,11 +36,26 @@@ static int write_lost_and_found static int verbose; static int show_progress = -1; static int show_dangling = 1; +static int name_objects; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 #define ERROR_PACK 04 #define ERROR_REFS 010 +static const char *describe_object(struct object *obj) +{ + static struct strbuf buf = STRBUF_INIT; + char *name = name_objects ? + lookup_decoration(fsck_walk_options.object_names, obj) : NULL; + + strbuf_reset(&buf); + strbuf_addstr(&buf, oid_to_hex(&obj->oid)); + if (name) + strbuf_addf(&buf, " (%s)", name); + + return buf.buf; +} + static int fsck_config(const char *var, const char *value, void *cb) { if (strcmp(var, "fsck.skiplist") == 0) { @@@ -83,7 -67,7 +83,7 @@@ static void objreport(struct object *ob const char *err) { fprintf(stderr, "%s in %s %s: %s\n", - msg_type, typename(obj->type), oid_to_hex(&obj->oid), err); + msg_type, typename(obj->type), describe_object(obj), err); } static int objerror(struct object *obj, const char *err) @@@ -93,8 -77,7 +93,8 @@@ return -1; } -static int fsck_error_func(struct object *obj, int type, const char *message) +static int fsck_error_func(struct fsck_options *o, + struct object *obj, int type, const char *message) { objreport(obj, (type == FSCK_WARN) ? "warning" : "error", message); return (type == FSCK_WARN) ? 0 : 1; @@@ -114,7 -97,7 +114,7 @@@ static int mark_object(struct object *o if (!obj) { /* ... these references to parent->fld are safe here */ printf("broken link from %7s %s\n", - typename(parent->type), oid_to_hex(&parent->oid)); + typename(parent->type), describe_object(parent)); printf("broken link from %7s %s\n", (type == OBJ_ANY ? "unknown" : typename(type)), "unknown"); errors_found |= ERROR_REACHABLE; @@@ -131,9 -114,9 +131,9 @@@ if (!(obj->flags & HAS_OBJ)) { if (parent && !has_object_file(&obj->oid)) { printf("broken link from %7s %s\n", - typename(parent->type), oid_to_hex(&parent->oid)); + typename(parent->type), describe_object(parent)); printf(" to %7s %s\n", - typename(obj->type), oid_to_hex(&obj->oid)); + typename(obj->type), describe_object(obj)); errors_found |= ERROR_REACHABLE; } return 1; @@@ -207,8 -190,7 +207,8 @@@ static void check_reachable_object(stru return; /* it is in pack - forget about it */ if (connectivity_only && has_object_file(&obj->oid)) return; - printf("missing %s %s\n", typename(obj->type), oid_to_hex(&obj->oid)); + printf("missing %s %s\n", typename(obj->type), + describe_object(obj)); errors_found |= ERROR_REACHABLE; return; } @@@ -233,8 -215,7 +233,8 @@@ static void check_unreachable_object(st * since this is something that is prunable. */ if (show_unreachable) { - printf("unreachable %s %s\n", typename(obj->type), oid_to_hex(&obj->oid)); + printf("unreachable %s %s\n", typename(obj->type), + describe_object(obj)); return; } @@@ -253,11 -234,11 +253,11 @@@ if (!obj->used) { if (show_dangling) printf("dangling %s %s\n", typename(obj->type), - oid_to_hex(&obj->oid)); + describe_object(obj)); if (write_lost_and_found) { char *filename = git_pathdup("lost-found/%s/%s", obj->type == OBJ_COMMIT ? "commit" : "other", - oid_to_hex(&obj->oid)); + describe_object(obj)); FILE *f; if (safe_create_leading_directories_const(filename)) { @@@ -271,7 -252,7 +271,7 @@@ if (stream_blob_to_fd(fileno(f), obj->oid.hash, NULL, 1)) die_errno("Could not write '%s'", filename); } else - fprintf(f, "%s\n", oid_to_hex(&obj->oid)); + fprintf(f, "%s\n", describe_object(obj)); if (fclose(f)) die_errno("Could not finish '%s'", filename); @@@ -290,7 -271,7 +290,7 @@@ static void check_object(struct object *obj) { if (verbose) - fprintf(stderr, "Checking %s\n", oid_to_hex(&obj->oid)); + fprintf(stderr, "Checking %s\n", describe_object(obj)); if (obj->flags & REACHABLE) check_reachable_object(obj); @@@ -326,7 -307,7 +326,7 @@@ static int fsck_obj(struct object *obj if (verbose) fprintf(stderr, "Checking %s %s\n", - typename(obj->type), oid_to_hex(&obj->oid)); + typename(obj->type), describe_object(obj)); if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); @@@ -345,17 -326,15 +345,17 @@@ free_commit_buffer(commit); if (!commit->parents && show_root) - printf("root %s\n", oid_to_hex(&commit->object.oid)); + printf("root %s\n", describe_object(&commit->object)); } if (obj->type == OBJ_TAG) { struct tag *tag = (struct tag *) obj; if (show_tags && tag->tagged) { - printf("tagged %s %s", typename(tag->tagged->type), oid_to_hex(&tag->tagged->oid)); - printf(" (%s) in %s\n", tag->tag, oid_to_hex(&tag->object.oid)); + printf("tagged %s %s", typename(tag->tagged->type), + describe_object(tag->tagged)); + printf(" (%s) in %s\n", tag->tag, + describe_object(&tag->object)); } } @@@ -377,6 -356,10 +377,10 @@@ static int fsck_sha1(const unsigned cha static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, unsigned long size, void *buffer, int *eaten) { + /* + * Note, buffer may be NULL if type is OBJ_BLOB. See + * verify_packfile(), data_valid variable for details. + */ struct object *obj; obj = parse_object_buffer(sha1, type, size, buffer, eaten); if (!obj) { @@@ -389,18 -372,13 +393,18 @@@ static int default_refs; -static void fsck_handle_reflog_sha1(const char *refname, unsigned char *sha1) +static void fsck_handle_reflog_sha1(const char *refname, unsigned char *sha1, + unsigned long timestamp) { struct object *obj; if (!is_null_sha1(sha1)) { obj = lookup_object(sha1); if (obj) { + if (timestamp && name_objects) + add_decoration(fsck_walk_options.object_names, + obj, + xstrfmt("%s@{%ld}", refname, timestamp)); obj->used = 1; mark_object_reachable(obj); } else { @@@ -420,8 -398,8 +424,8 @@@ static int fsck_handle_reflog_ent(unsig fprintf(stderr, "Checking reflog %s->%s\n", sha1_to_hex(osha1), sha1_to_hex(nsha1)); - fsck_handle_reflog_sha1(refname, osha1); - fsck_handle_reflog_sha1(refname, nsha1); + fsck_handle_reflog_sha1(refname, osha1, 0); + fsck_handle_reflog_sha1(refname, nsha1, timestamp); return 0; } @@@ -450,9 -428,6 +454,9 @@@ static int fsck_handle_ref(const char * } default_refs++; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(refname)); mark_object_reachable(obj); return 0; @@@ -522,12 -497,13 +526,12 @@@ static void fsck_object_dir(const char static int fsck_head_link(void) { - int flag; int null_is_error = 0; if (verbose) fprintf(stderr, "Checking HEAD link\n"); - head_points_at = resolve_ref_unsafe("HEAD", 0, head_oid.hash, &flag); + head_points_at = resolve_ref_unsafe("HEAD", 0, head_oid.hash, NULL); if (!head_points_at) { errors_found |= ERROR_REFS; return error("Invalid HEAD"); @@@ -568,9 -544,6 +572,9 @@@ static int fsck_cache_tree(struct cache return 1; } obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(":")); mark_object_reachable(obj); if (obj->type != OBJ_TREE) err |= objerror(obj, "non-tree in cache-tree"); @@@ -599,7 -572,6 +603,7 @@@ static struct option fsck_opts[] = OPT_BOOL(0, "lost-found", &write_lost_and_found, N_("write dangling objects in .git/lost-found")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), + OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")), OPT_END(), }; @@@ -629,10 -601,6 +633,10 @@@ int cmd_fsck(int argc, const char **arg include_reflogs = 0; } + if (name_objects) + fsck_walk_options.object_names = + xcalloc(1, sizeof(struct decoration)); + git_config(fsck_config, NULL); fsck_head_link(); @@@ -688,9 -656,6 +692,9 @@@ continue; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, xstrdup(arg)); mark_object_reachable(obj); heads++; continue; @@@ -723,10 -688,6 +727,10 @@@ continue; obj = &blob->object; obj->used = 1; + if (name_objects) + add_decoration(fsck_walk_options.object_names, + obj, + xstrfmt(":%s", active_cache[i]->name)); mark_object_reachable(obj); } if (active_cache_tree) diff --combined builtin/pack-objects.c index a2f8cfdec0,ac7a3a5895..92e2e5f7a8 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -44,7 -44,6 +44,7 @@@ static int non_empty static int reuse_delta = 1, reuse_object = 1; static int keep_unreachable, unpack_unreachable, include_tag; static unsigned long unpack_unreachable_expiration; +static int pack_loose_unreachable; static int local; static int incremental; static int ignore_packed_keep; @@@ -342,15 -341,15 +342,15 @@@ static unsigned long write_no_reuse_obj } /* Return 0 if we will bust the pack-size limit */ - static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry, - unsigned long limit, int usable_delta) + static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry, + unsigned long limit, int usable_delta) { struct packed_git *p = entry->in_pack; struct pack_window *w_curs = NULL; struct revindex_entry *revidx; off_t offset; enum object_type type = entry->type; - unsigned long datalen; + off_t datalen; unsigned char header[10], dheader[10]; unsigned hdrlen; @@@ -416,11 -415,12 +416,12 @@@ } /* Return 0 if we will bust the pack-size limit */ - static unsigned long write_object(struct sha1file *f, - struct object_entry *entry, - off_t write_offset) + static off_t write_object(struct sha1file *f, + struct object_entry *entry, + off_t write_offset) { - unsigned long limit, len; + unsigned long limit; + off_t len; int usable_delta, to_reuse; if (!pack_to_stdout) @@@ -492,7 -492,7 +493,7 @@@ static enum write_one_status write_one( struct object_entry *e, off_t *offset) { - unsigned long size; + off_t size; int recursing; /* @@@ -836,7 -836,8 +837,7 @@@ static void write_pack_file(void * to preserve this property. */ if (stat(pack_tmp_name, &st) < 0) { - warning("failed to stat %s: %s", - pack_tmp_name, strerror(errno)); + warning_errno("failed to stat %s", pack_tmp_name); } else if (!last_mtime) { last_mtime = st.st_mtime; } else { @@@ -844,7 -845,8 +845,7 @@@ utb.actime = st.st_atime; utb.modtime = --last_mtime; if (utime(pack_tmp_name, &utb) < 0) - warning("failed utime() on %s: %s", - pack_tmp_name, strerror(errno)); + warning_errno("failed utime() on %s", pack_tmp_name); } strbuf_addf(&tmpname, "%s-", base_name); @@@ -1192,7 -1194,7 +1193,7 @@@ static void add_pbase_object(struct tre if (cmp < 0) return; if (name[cmplen] != '/') { - add_object_entry(entry.sha1, + add_object_entry(entry.oid->hash, object_type(entry.mode), fullname, 1); return; @@@ -1203,7 -1205,7 +1204,7 @@@ const char *down = name+cmplen+1; int downlen = name_cmp_len(down); - tree = pbase_tree_get(entry.sha1); + tree = pbase_tree_get(entry.oid->hash); if (!tree) return; init_tree_desc(&sub, tree->tree_data, tree->tree_size); @@@ -2379,32 -2381,6 +2380,32 @@@ static void add_objects_in_unpacked_pac free(in_pack.array); } +static int add_loose_object(const unsigned char *sha1, const char *path, + void *data) +{ + enum object_type type = sha1_object_info(sha1, NULL); + + if (type < 0) { + warning("loose object at %s could not be examined", path); + return 0; + } + + add_object_entry(sha1, type, "", 0); + return 0; +} + +/* + * We actually don't even have to worry about reachability here. + * add_object_entry will weed out duplicates, so we just add every + * loose object we find. + */ +static void add_unreachable_loose_objects(void) +{ + for_each_loose_file_in_objdir(get_object_directory(), + add_loose_object, + NULL, NULL, NULL); +} + static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) { static struct packed_git *last_found = (void *)1; @@@ -2574,8 -2550,6 +2575,8 @@@ static void get_object_list(int ac, con if (keep_unreachable) add_objects_in_unpacked_packs(&revs); + if (pack_loose_unreachable) + add_unreachable_loose_objects(); if (unpack_unreachable) loosen_unused_packed_objects(&revs); @@@ -2676,8 -2650,6 +2677,8 @@@ int cmd_pack_objects(int argc, const ch N_("include tag objects that refer to objects to be packed")), OPT_BOOL(0, "keep-unreachable", &keep_unreachable, N_("keep unreachable objects")), + OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable, + N_("pack loose unreachable objects")), { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than