From: Junio C Hamano Date: Fri, 10 Jan 2014 18:33:11 +0000 (-0800) Subject: Merge branch 'jk/oi-delta-base' X-Git-Tag: v1.9-rc0~30 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/b2132068c6df38b24b49dfc6fbbf0645b21ec037?ds=inline;hp=-c Merge branch 'jk/oi-delta-base' Teach "cat-file --batch" to show delta-base object name for a packed object that is represented as a delta. * jk/oi-delta-base: cat-file: provide %(deltabase) batch format sha1_object_info_extended: provide delta base sha1s --- b2132068c6df38b24b49dfc6fbbf0645b21ec037 diff --combined builtin/cat-file.c index f12071adc4,2e0af2e617..d5a93e0e91 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@@ -118,6 -118,7 +118,7 @@@ struct expand_data unsigned long size; unsigned long disk_size; const char *rest; + unsigned char delta_base_sha1[20]; /* * If mark_query is true, we do not expand anything, but rather @@@ -174,6 -175,11 +175,11 @@@ static void expand_atom(struct strbuf * data->split_on_whitespace = 1; else if (data->rest) strbuf_addstr(sb, data->rest); + } else if (is_atom("deltabase", atom, len)) { + if (data->mark_query) + data->info.delta_base_sha1 = data->delta_base_sha1; + else + strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1)); } else die("unknown format element: %.*s", len, atom); } @@@ -193,28 -199,25 +199,28 @@@ static size_t expand_format(struct strb return end - start + 1; } -static void print_object_or_die(int fd, const unsigned char *sha1, - enum object_type type, unsigned long size) +static void print_object_or_die(int fd, struct expand_data *data) { - if (type == OBJ_BLOB) { + const unsigned char *sha1 = data->sha1; + + assert(data->info.typep); + + if (data->type == OBJ_BLOB) { if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0) die("unable to stream %s to stdout", sha1_to_hex(sha1)); } else { - enum object_type rtype; - unsigned long rsize; + enum object_type type; + unsigned long size; void *contents; - contents = read_sha1_file(sha1, &rtype, &rsize); + contents = read_sha1_file(sha1, &type, &size); if (!contents) die("object %s disappeared", sha1_to_hex(sha1)); - if (rtype != type) + if (type != data->type) die("object %s changed type!?", sha1_to_hex(sha1)); - if (rsize != size) - die("object %s change size!?", sha1_to_hex(sha1)); + if (data->info.sizep && size != data->size) + die("object %s changed size!?", sha1_to_hex(sha1)); write_or_die(fd, contents, size); free(contents); @@@ -241,7 -244,7 +247,7 @@@ static int batch_one_object(const char return 0; } - if (sha1_object_info_extended(data->sha1, &data->info) < 0) { + if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { printf("%s missing\n", obj_name); fflush(stdout); return 0; @@@ -253,7 -256,7 +259,7 @@@ strbuf_release(&buf); if (opt->print_contents) { - print_object_or_die(1, data->sha1, data->type, data->size); + print_object_or_die(1, data); write_or_die(1, "\n", 1); } return 0; @@@ -277,13 -280,6 +283,13 @@@ static int batch_objects(struct batch_o strbuf_expand(&buf, opt->format, expand_format, &data); data.mark_query = 0; + /* + * If we are printing out the object, then always fill in the type, + * since we will want to decide whether or not to stream. + */ + if (opt->print_contents) + data.info.typep = &data.type; + /* * We are going to call get_sha1 on a potentially very large number of * objects. In most large cases, these will be actual object sha1s. The diff --combined cache.h index 9ba9773edf,67356dbe54..83a27269b8 --- a/cache.h +++ b/cache.h @@@ -760,11 -760,11 +760,11 @@@ int daemon_avoid_alias(const char *path int offset_1st_component(const char *path); /* object replacement */ -#define READ_SHA1_FILE_REPLACE 1 +#define LOOKUP_REPLACE_OBJECT 1 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { - return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE); + return read_sha1_file_extended(sha1, type, size, LOOKUP_REPLACE_OBJECT); } extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) @@@ -773,12 -773,6 +773,12 @@@ return sha1; return do_lookup_replace_object(sha1); } +static inline const unsigned char *lookup_replace_object_extended(const unsigned char *sha1, unsigned flag) +{ + if (!(flag & LOOKUP_REPLACE_OBJECT)) + return sha1; + return lookup_replace_object(sha1); +} /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ extern int sha1_object_info(const unsigned char *, unsigned long *); @@@ -1080,6 -1074,7 +1080,7 @@@ struct object_info enum object_type *typep; unsigned long *sizep; unsigned long *disk_sizep; + unsigned char *delta_base_sha1; /* Response */ enum { @@@ -1104,7 -1099,7 +1105,7 @@@ } packed; } u; }; -extern int sha1_object_info_extended(const unsigned char *, struct object_info *); +extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); /* Dumb servers support */ extern int update_server_info(int); diff --combined sha1_file.c index a2ff2961b1,4e8dd8be6d..e13bd2c3ee --- a/sha1_file.c +++ b/sha1_file.c @@@ -807,38 -807,15 +807,38 @@@ void free_pack_by_name(const char *pack static unsigned int get_max_fd_limit(void) { #ifdef RLIMIT_NOFILE - struct rlimit lim; + { + struct rlimit lim; - if (getrlimit(RLIMIT_NOFILE, &lim)) - die_errno("cannot get RLIMIT_NOFILE"); + if (!getrlimit(RLIMIT_NOFILE, &lim)) + return lim.rlim_cur; + } +#endif - return lim.rlim_cur; -#elif defined(_SC_OPEN_MAX) - return sysconf(_SC_OPEN_MAX); -#elif defined(OPEN_MAX) +#ifdef _SC_OPEN_MAX + { + long open_max = sysconf(_SC_OPEN_MAX); + if (0 < open_max) + return open_max; + /* + * Otherwise, we got -1 for one of the two + * reasons: + * + * (1) sysconf() did not understand _SC_OPEN_MAX + * and signaled an error with -1; or + * (2) sysconf() said there is no limit. + * + * We _could_ clear errno before calling sysconf() to + * tell these two cases apart and return a huge number + * in the latter case to let the caller cap it to a + * value that is not so selfish, but letting the + * fallback OPEN_MAX codepath take care of these cases + * is a lot simpler. + */ + } +#endif + +#ifdef OPEN_MAX return OPEN_MAX; #else return 1; /* see the caller ;-) */ @@@ -1690,6 -1667,38 +1690,38 @@@ static off_t get_delta_base(struct pack return base_offset; } + /* + * Like get_delta_base above, but we return the sha1 instead of the pack + * offset. This means it is cheaper for REF deltas (we do not have to do + * the final object lookup), but more expensive for OFS deltas (we + * have to load the revidx to convert the offset back into a sha1). + */ + static const unsigned char *get_delta_base_sha1(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos, + enum object_type type, + off_t delta_obj_offset) + { + if (type == OBJ_REF_DELTA) { + unsigned char *base = use_pack(p, w_curs, curpos, NULL); + return base; + } else if (type == OBJ_OFS_DELTA) { + struct revindex_entry *revidx; + off_t base_offset = get_delta_base(p, w_curs, &curpos, + type, delta_obj_offset); + + if (!base_offset) + return NULL; + + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return NULL; + + return nth_packed_object_sha1(p, revidx->nr); + } else + return NULL; + } + int unpack_object_header(struct packed_git *p, struct pack_window **w_curs, off_t *curpos, @@@ -1847,6 -1856,22 +1879,22 @@@ static int packed_object_info(struct pa } } + if (oi->delta_base_sha1) { + if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { + const unsigned char *base; + + base = get_delta_base_sha1(p, &w_curs, curpos, + type, obj_offset); + if (!base) { + type = OBJ_BAD; + goto out; + } + + hashcpy(oi->delta_base_sha1, base); + } else + hashclr(oi->delta_base_sha1); + } + out: unuse_pack(&w_curs); return type; @@@ -2430,6 -2455,9 +2478,9 @@@ static int sha1_loose_object_info(cons git_zstream stream; char hdr[32]; + if (oi->delta_base_sha1) + hashclr(oi->delta_base_sha1); + /* * If we don't care about type or size, then we don't * need to look inside the object at all. Note that we @@@ -2466,14 -2494,13 +2517,14 @@@ return 0; } -int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi) +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags) { struct cached_object *co; struct pack_entry e; int rtype; + const unsigned char *real = lookup_replace_object_extended(sha1, flags); - co = find_cached_object(sha1); + co = find_cached_object(real); if (co) { if (oi->typep) *(oi->typep) = co->type; @@@ -2481,27 -2508,29 +2532,29 @@@ *(oi->sizep) = co->size; if (oi->disk_sizep) *(oi->disk_sizep) = 0; + if (oi->delta_base_sha1) + hashclr(oi->delta_base_sha1); oi->whence = OI_CACHED; return 0; } - if (!find_pack_entry(sha1, &e)) { + if (!find_pack_entry(real, &e)) { /* Most likely it's a loose object. */ - if (!sha1_loose_object_info(sha1, oi)) { + if (!sha1_loose_object_info(real, oi)) { oi->whence = OI_LOOSE; return 0; } /* Not a loose object; someone else may have just packed it. */ reprepare_packed_git(); - if (!find_pack_entry(sha1, &e)) + if (!find_pack_entry(real, &e)) return -1; } rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { - mark_bad_packed_object(e.p, sha1); - return sha1_object_info_extended(sha1, oi); + mark_bad_packed_object(e.p, real); + return sha1_object_info_extended(real, oi, 0); } else if (in_delta_base_cache(e.p, e.offset)) { oi->whence = OI_DBCACHED; } else { @@@ -2523,7 -2552,7 +2576,7 @@@ int sha1_object_info(const unsigned cha oi.typep = &type; oi.sizep = sizep; - if (sha1_object_info_extended(sha1, &oi) < 0) + if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0) return -1; return type; } @@@ -2615,7 -2644,8 +2668,7 @@@ void *read_sha1_file_extended(const uns void *data; char *path; const struct packed_git *p; - const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE) - ? lookup_replace_object(sha1) : sha1; + const unsigned char *repl = lookup_replace_object_extended(sha1, flag); errno = 0; data = read_object(repl, type, size); diff --combined t/t1006-cat-file.sh index 1687098355,633dc825ec..a72e700ae4 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@@ -85,28 -85,6 +85,28 @@@ $content git cat-file --batch-check="%(objecttype) %(rest)" >actual && test_cmp expect actual ' + + test -z "$content" || + test_expect_success "--batch without type ($type)" ' + { + echo "$size" && + maybe_remove_timestamp "$content" $no_ts + } >expect && + echo $sha1 | git cat-file --batch="%(objectsize)" >actual.full && + maybe_remove_timestamp "$(cat actual.full)" $no_ts >actual && + test_cmp expect actual + ' + + test -z "$content" || + test_expect_success "--batch without size ($type)" ' + { + echo "$type" && + maybe_remove_timestamp "$content" $no_ts + } >expect && + echo $sha1 | git cat-file --batch="%(objecttype)" >actual.full && + maybe_remove_timestamp "$(cat actual.full)" $no_ts >actual && + test_cmp expect actual + ' } hello_content="Hello World" @@@ -262,4 -240,38 +262,38 @@@ test_expect_success "--batch-check wit "$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)" ' + test_expect_success 'setup blobs which are likely to delta' ' + test-genrandom foo 10240 >foo && + { cat foo; echo plus; } >foo-plus && + git add foo foo-plus && + git commit -m foo && + cat >blobs <<-\EOF + HEAD:foo + HEAD:foo-plus + EOF + ' + + test_expect_success 'confirm that neither loose blob is a delta' ' + cat >expect <<-EOF + $_z40 + $_z40 + EOF + git cat-file --batch-check="%(deltabase)" actual && + test_cmp expect actual + ' + + # To avoid relying too much on the current delta heuristics, + # we will check only that one of the two objects is a delta + # against the other, but not the order. We can do so by just + # asking for the base of both, and checking whether either + # sha1 appears in the output. + test_expect_success '%(deltabase) reports packed delta bases' ' + git repack -ad && + git cat-file --batch-check="%(deltabase)" actual && + { + grep "$(git rev-parse HEAD:foo)" actual || + grep "$(git rev-parse HEAD:foo-plus)" actual + } + ' + test_done