From: Junio C Hamano Date: Thu, 13 Nov 2008 06:26:35 +0000 (-0800) Subject: Merge branch 'np/pack-safer' X-Git-Tag: v1.6.1-rc1~55 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/7b51b77dbc71e83789f59f0cd559489115478f27?ds=inline;hp=-c Merge branch 'np/pack-safer' * np/pack-safer: t5303: fix printf format string for portability t5303: work around printf breakage in dash pack-objects: don't leak pack window reference when splitting packs extend test coverage for latest pack corruption resilience improvements pack-objects: allow "fixing" a corrupted pack without a full repack make find_pack_revindex() aware of the nasty world make check_object() resilient to pack corruptions make packed_object_info() resilient to pack corruptions make unpack_object_header() non fatal better validation on delta base object offsets close another possibility for propagating pack corruption --- 7b51b77dbc71e83789f59f0cd559489115478f27 diff --combined builtin-pack-objects.c index 59ae64d83f,1b6eff314e..67eefa2932 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@@ -71,7 -71,6 +71,7 @@@ static int reuse_delta = 1, reuse_objec static int keep_unreachable, unpack_unreachable, include_tag; static int local; static int incremental; +static int ignore_packed_keep; static int allow_ofs_delta; static const char *base_name; static int progress = 1; @@@ -246,16 -245,8 +246,16 @@@ static unsigned long write_object(struc type = entry->type; /* write limit if limited packsize and not first object */ - limit = pack_size_limit && nr_written ? - pack_size_limit - write_offset : 0; + if (!pack_size_limit || !nr_written) + limit = 0; + else if (pack_size_limit <= write_offset) + /* + * the earlier object did not fit the limit; avoid + * mistaking this with unlimited (i.e. limit = 0). + */ + limit = 1; + else + limit = pack_size_limit - write_offset; if (!entry->delta) usable_delta = 0; /* no delta */ @@@ -286,6 -277,7 +286,7 @@@ */ if (!to_reuse) { + no_reuse: if (!usable_delta) { buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) @@@ -367,46 -359,60 +368,60 @@@ struct revindex_entry *revidx; off_t offset; - if (entry->delta) { + if (entry->delta) type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; - reused_delta++; - } hdrlen = encode_header(type, entry->size, header); + offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; if (!pack_to_stdout && p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) - die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { + error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (!pack_to_stdout && p->index_version == 1 && + check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { + error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); - if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; + reused_delta++; } else if (type == OBJ_REF_DELTA) { - if (limit && hdrlen + 20 + datalen + 20 >= limit) + if (limit && hdrlen + 20 + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, entry->delta->idx.sha1, 20); hdrlen += 20; + reused_delta++; } else { - if (limit && hdrlen + datalen + 20 >= limit) + if (limit && hdrlen + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); } - - if (!pack_to_stdout && p->index_version == 1 && - check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) - die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); copy_pack_data(f, p, &w_curs, offset, datalen); unuse_pack(&w_curs); reused++; @@@ -699,9 -705,6 +714,9 @@@ static int add_object_entry(const unsig return 0; } + if (!exclude && local && has_loose_object_nonlocal(sha1)) + return 0; + for (p = packed_git; p; p = p->next) { off_t offset = find_pack_entry_one(sha1, p); if (offset) { @@@ -715,8 -718,6 +730,8 @@@ return 0; if (local && !p->pack_local) return 0; + if (ignore_packed_keep && p->pack_local && p->pack_keep) + return 0; } } @@@ -1016,9 -1017,11 +1031,11 @@@ static void check_object(struct object_ * We want in_pack_type even if we do not reuse delta * since non-delta representations could still be reused. */ - used = unpack_object_header_gently(buf, avail, + used = unpack_object_header_buffer(buf, avail, &entry->in_pack_type, &entry->size); + if (used == 0) + goto give_up; /* * Determine if this is a delta and if so whether we can @@@ -1030,6 -1033,8 +1047,8 @@@ /* Not a delta hence we've already got all we need. */ entry->type = entry->in_pack_type; entry->in_pack_header_size = used; + if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB) + goto give_up; unuse_pack(&w_curs); return; case OBJ_REF_DELTA: @@@ -1046,19 -1051,25 +1065,25 @@@ ofs = c & 127; while (c & 128) { ofs += 1; - if (!ofs || MSB(ofs, 7)) - die("delta base offset overflow in pack for %s", - sha1_to_hex(entry->idx.sha1)); + if (!ofs || MSB(ofs, 7)) { + error("delta base offset overflow in pack for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } c = buf[used_0++]; ofs = (ofs << 7) + (c & 127); } - if (ofs >= entry->in_pack_offset) - die("delta base offset out of bound for %s", - sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; + if (ofs <= 0 || ofs >= entry->in_pack_offset) { + error("delta base offset out of bound for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); + if (!revidx) + goto give_up; base_ref = nth_packed_object_sha1(p, revidx->nr); } entry->in_pack_header_size = used + used_0; @@@ -1078,6 -1089,7 +1103,7 @@@ */ entry->type = entry->in_pack_type; entry->delta = base_entry; + entry->delta_size = entry->size; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; unuse_pack(&w_curs); @@@ -1092,6 -1104,8 +1118,8 @@@ */ entry->size = get_size_from_delta(p, &w_curs, entry->in_pack_offset + entry->in_pack_header_size); + if (entry->size == 0) + goto give_up; unuse_pack(&w_curs); return; } @@@ -1101,6 -1115,7 +1129,7 @@@ * with sha1_object_info() to find about the object type * at this point... */ + give_up: unuse_pack(&w_curs); } @@@ -1712,6 -1727,16 +1741,16 @@@ static void prepare_pack(int window, in get_object_details(); + /* + * If we're locally repacking then we need to be doubly careful + * from now on in order to make sure no stealth corruption gets + * propagated to the new pack. Clients receiving streamed packs + * should validate everything they get anyway so no need to incur + * the additional cost here in that case. + */ + if (!pack_to_stdout) + do_check_packed_object_crc = 1; + if (!nr_objects || !window || !depth) return; @@@ -2062,10 -2087,6 +2101,10 @@@ int cmd_pack_objects(int argc, const ch incremental = 1; continue; } + if (!strcmp("--honor-pack-keep", arg)) { + ignore_packed_keep = 1; + continue; + } if (!prefixcmp(arg, "--compression=")) { char *end; int level = strtoul(arg+14, &end, 0); diff --combined cache.h index c776f2f5ab,c440598e27..3b5f0c4c00 --- a/cache.h +++ b/cache.h @@@ -262,7 -262,6 +262,7 @@@ static inline void remove_name_hash(str #define read_cache() read_index(&the_index) #define read_cache_from(path) read_index_from(&the_index, (path)) +#define is_cache_unborn() is_index_unborn(&the_index) #define read_cache_unmerged() read_index_unmerged(&the_index) #define write_cache(newfd, cache, entries) write_index(&the_index, (newfd)) #define discard_cache() discard_index(&the_index) @@@ -369,7 -368,6 +369,7 @@@ extern int init_db(const char *template /* Initialize and use the cache information */ extern int read_index(struct index_state *); extern int read_index_from(struct index_state *, const char *path); +extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); extern int write_index(const struct index_state *, int newfd); extern int discard_index(struct index_state *); @@@ -436,7 -434,7 +436,7 @@@ extern int commit_locked_index(struct l extern void set_alternate_index_output(const char *); extern int close_lock_file(struct lock_file *); extern void rollback_lock_file(struct lock_file *); -extern int delete_ref(const char *, const unsigned char *sha1); +extern int delete_ref(const char *, const unsigned char *sha1, int delopt); /* Environment bits from configuration mechanism */ extern int trust_executable_bit; @@@ -497,13 -495,6 +497,13 @@@ extern int check_repository_format(void #define DATA_CHANGED 0x0020 #define TYPE_CHANGED 0x0040 +extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *git_snpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *git_pathdup(const char *fmt, ...) + __attribute__((format (printf, 1, 2))); + /* Return a statically allocated filename matching the sha1 signature */ extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); @@@ -574,13 -565,15 +574,16 @@@ extern int force_object_loose(const uns /* just like read_sha1_file(), but non fatal in presence of bad objects */ extern void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size); + /* global flag to enable extra checks when accessing packed objects */ + extern int do_check_packed_object_crc; + extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); extern int move_temp_to_file(const char *tmpfile, const char *filename); extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); extern int has_sha1_file(const unsigned char *sha1); +extern int has_loose_object_nonlocal(const unsigned char *sha1); extern int has_pack_file(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); @@@ -689,8 -682,7 +692,8 @@@ extern struct packed_git int index_version; time_t mtime; int pack_fd; - int pack_local; + unsigned pack_local:1, + pack_keep:1; unsigned char sha1[20]; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ @@@ -762,7 -754,7 +765,7 @@@ extern const unsigned char *nth_packed_ extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t); extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); - extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); + extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); extern int matches_pack_name(struct packed_git *p, const char *name); diff --combined index-pack.c index fe75332a9c,da03eeeca1..60ed41a993 --- a/index-pack.c +++ b/index-pack.c @@@ -221,23 -221,17 +221,23 @@@ static void bad_object(unsigned long of die("pack has bad object at offset %lu: %s", offset, buf); } +static void free_base_data(struct base_data *c) +{ + if (c->data) { + free(c->data); + c->data = NULL; + base_cache_used -= c->size; + } +} + static void prune_base_data(struct base_data *retain) { struct base_data *b = base_cache; for (b = base_cache; base_cache_used > delta_base_cache_limit && b; b = b->child) { - if (b->data && b != retain) { - free(b->data); - b->data = NULL; - base_cache_used -= b->size; - } + if (b->data && b != retain) + free_base_data(b); } } @@@ -250,8 -244,7 +250,8 @@@ static void link_base_data(struct base_ c->base = base; c->child = NULL; - base_cache_used += c->size; + if (c->data) + base_cache_used += c->size; prune_base_data(c); } @@@ -262,7 -255,10 +262,7 @@@ static void unlink_base_data(struct bas base->child = NULL; else base_cache = NULL; - if (c->data) { - free(c->data); - base_cache_used -= c->size; - } + free_base_data(c); } static void *unpack_entry_data(unsigned long offset, unsigned long size) @@@ -338,7 -334,7 +338,7 @@@ static void *unpack_raw_entry(struct ob base_offset = (base_offset << 7) + (c & 127); } delta_base->offset = obj->idx.offset - base_offset; - if (delta_base->offset >= obj->idx.offset) + if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset) bad_object(obj->idx.offset, "delta base offset is out of bound"); break; case OBJ_COMMIT: @@@ -412,24 -408,22 +412,24 @@@ static int find_delta(const union delta return -first-1; } -static int find_delta_children(const union delta_base *base, - int *first_index, int *last_index) +static void find_delta_children(const union delta_base *base, + int *first_index, int *last_index) { int first = find_delta(base); int last = first; int end = nr_deltas - 1; - if (first < 0) - return -1; + if (first < 0) { + *first_index = 0; + *last_index = -1; + return; + } while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) --first; while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) ++last; *first_index = first; *last_index = last; - return 0; } static void sha1_object(const void *data, unsigned long size, @@@ -500,10 -494,8 +500,10 @@@ static void *get_base_data(struct base_ free(raw); if (!c->data) bad_object(obj->idx.offset, "failed to apply delta"); - } else + } else { c->data = get_data_from_pack(obj); + c->size = obj->size; + } base_cache_used += c->size; prune_base_data(c); @@@ -512,74 -504,49 +512,74 @@@ } static void resolve_delta(struct object_entry *delta_obj, - struct base_data *base_obj, enum object_type type) + struct base_data *base, struct base_data *result) { - void *delta_data; - unsigned long delta_size; - union delta_base delta_base; - int j, first, last; - struct base_data result; + void *base_data, *delta_data; - delta_obj->real_type = type; + delta_obj->real_type = base->obj->real_type; delta_data = get_data_from_pack(delta_obj); - delta_size = delta_obj->size; - result.data = patch_delta(get_base_data(base_obj), base_obj->size, - delta_data, delta_size, - &result.size); + base_data = get_base_data(base); + result->obj = delta_obj; + result->data = patch_delta(base_data, base->size, + delta_data, delta_obj->size, &result->size); free(delta_data); - if (!result.data) + if (!result->data) bad_object(delta_obj->idx.offset, "failed to apply delta"); - sha1_object(result.data, result.size, type, delta_obj->idx.sha1); + sha1_object(result->data, result->size, delta_obj->real_type, + delta_obj->idx.sha1); nr_resolved_deltas++; +} + +static void find_unresolved_deltas(struct base_data *base, + struct base_data *prev_base) +{ + int i, ref_first, ref_last, ofs_first, ofs_last; + + /* + * This is a recursive function. Those brackets should help reducing + * stack usage by limiting the scope of the delta_base union. + */ + { + union delta_base base_spec; + + hashcpy(base_spec.sha1, base->obj->idx.sha1); + find_delta_children(&base_spec, &ref_first, &ref_last); - result.obj = delta_obj; - link_base_data(base_obj, &result); + memset(&base_spec, 0, sizeof(base_spec)); + base_spec.offset = base->obj->idx.offset; + find_delta_children(&base_spec, &ofs_first, &ofs_last); + } - hashcpy(delta_base.sha1, delta_obj->idx.sha1); - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &result, type); + if (ref_last == -1 && ofs_last == -1) { + free(base->data); + return; + } + + link_base_data(prev_base, base); + + for (i = ref_first; i <= ref_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_REF_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ref_last && ofs_last == -1) + free_base_data(base); + find_unresolved_deltas(&result, base); } } - memset(&delta_base, 0, sizeof(delta_base)); - delta_base.offset = delta_obj->idx.offset; - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &result, type); + for (i = ofs_first; i <= ofs_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_OFS_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ofs_last) + free_base_data(base); + find_unresolved_deltas(&result, base); } } - unlink_base_data(&result); + unlink_base_data(base); } static int compare_delta_entry(const void *a, const void *b) @@@ -655,13 -622,37 +655,13 @@@ static void parse_pack_objects(unsigne progress = start_progress("Resolving deltas", nr_deltas); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - union delta_base base; - int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; struct base_data base_obj; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - hashcpy(base.sha1, obj->idx.sha1); - ref = !find_delta_children(&base, &ref_first, &ref_last); - memset(&base, 0, sizeof(base)); - base.offset = obj->idx.offset; - ofs = !find_delta_children(&base, &ofs_first, &ofs_last); - if (!ref && !ofs) - continue; - base_obj.data = get_data_from_pack(obj); - base_obj.size = obj->size; base_obj.obj = obj; - link_base_data(NULL, &base_obj); - - if (ref) - for (j = ref_first; j <= ref_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - if (ofs) - for (j = ofs_first; j <= ofs_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - unlink_base_data(&base_obj); + base_obj.data = NULL; + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } } @@@ -754,6 -745,7 +754,6 @@@ static void fix_unresolved_deltas(struc for (i = 0; i < n; i++) { struct delta_entry *d = sorted_by_pos[i]; enum object_type type; - int j, first, last; struct base_data base_obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) @@@ -767,7 -759,16 +767,7 @@@ die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); base_obj.obj = append_obj_to_pack(f, d->base.sha1, base_obj.data, base_obj.size, type); - link_base_data(NULL, &base_obj); - - find_delta_children(&d->base, &first, &last); - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, type); - } - - unlink_base_data(&base_obj); + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); diff --combined sha1_file.c index 491220572b,9ce1df0cff..0fa65baa59 --- a/sha1_file.c +++ b/sha1_file.c @@@ -423,30 -423,23 +423,30 @@@ void prepare_alt_odb(void read_info_alternates(get_object_directory(), 0); } -static int has_loose_object(const unsigned char *sha1) +static int has_loose_object_local(const unsigned char *sha1) { char *name = sha1_file_name(sha1); - struct alternate_object_database *alt; + return !access(name, F_OK); +} - if (!access(name, F_OK)) - return 1; +int has_loose_object_nonlocal(const unsigned char *sha1) +{ + struct alternate_object_database *alt; prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { - name = alt->name; - fill_sha1_path(name, sha1); + fill_sha1_path(alt->name, sha1); if (!access(alt->base, F_OK)) return 1; } return 0; } +static int has_loose_object(const unsigned char *sha1) +{ + return has_loose_object_local(sha1) || + has_loose_object_nonlocal(sha1); +} + static unsigned int pack_used_ctr; static unsigned int pack_mmap_calls; static unsigned int peak_pack_open_windows; @@@ -848,11 -841,6 +848,11 @@@ struct packed_git *add_packed_git(cons return NULL; } memcpy(p->pack_name, path, path_len); + + strcpy(p->pack_name + path_len, ".keep"); + if (!access(p->pack_name, F_OK)) + p->pack_keep = 1; + strcpy(p->pack_name + path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); @@@ -1122,7 -1110,8 +1122,8 @@@ static int legacy_loose_object(unsigne return 0; } - unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) + unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) { unsigned shift; unsigned char c; @@@ -1134,10 -1123,10 +1135,10 @@@ size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used) - return 0; - if (sizeof(long) * 8 <= shift) + if (len <= used || sizeof(long) * 8 <= shift) { + error("bad object header"); return 0; + } c = buf[used++]; size += (c & 0x7f) << shift; shift += 7; @@@ -1176,7 -1165,7 +1177,7 @@@ static int unpack_sha1_header(z_stream * really worth it and we don't write it any longer. But we * can still read it. */ - used = unpack_object_header_gently(map, mapsize, &type, &size); + used = unpack_object_header_buffer(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; map += used; @@@ -1325,8 -1314,10 +1326,10 @@@ unsigned long get_size_from_delta(struc } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); - if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) - die("delta data unpack-initial failed"); + if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) { + error("delta data unpack-initial failed"); + return 0; + } /* Examine the initial part of the delta to figure out * the result size. @@@ -1367,7 -1358,7 +1370,7 @@@ static off_t get_delta_base(struct pack base_offset = (base_offset << 7) + (c & 127); } base_offset = delta_obj_offset - base_offset; - if (base_offset >= delta_obj_offset) + if (base_offset <= 0 || base_offset >= delta_obj_offset) return 0; /* out of bound */ *curpos += used; } else if (type == OBJ_REF_DELTA) { @@@ -1393,15 -1384,32 +1396,32 @@@ static int packed_delta_info(struct pac off_t base_offset; base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); + if (!base_offset) + return OBJ_BAD; type = packed_object_info(p, base_offset, NULL); + if (type <= OBJ_NONE) { + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return OBJ_BAD; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); + mark_bad_packed_object(p, base_sha1); + type = sha1_object_info(base_sha1, NULL); + if (type <= OBJ_NONE) + return OBJ_BAD; + } /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (sizep) + if (sizep) { *sizep = get_size_from_delta(p, w_curs, curpos); + if (*sizep == 0) + type = OBJ_BAD; + } return type; } @@@ -1423,10 -1431,11 +1443,11 @@@ static int unpack_object_header(struct * insane, so we know won't exceed what we have been given. */ base = use_pack(p, w_curs, *curpos, &left); - used = unpack_object_header_gently(base, left, &type, sizep); - if (!used) - die("object offset outside of pack file"); - *curpos += used; + used = unpack_object_header_buffer(base, left, &type, sizep); + if (!used) { + type = OBJ_BAD; + } else + *curpos += used; return type; } @@@ -1510,8 -1519,9 +1531,9 @@@ static int packed_object_info(struct pa *sizep = size; break; default: - die("pack %s contains unknown object type %d", - p->pack_name, type); + error("unknown object type %i at offset %"PRIuMAX" in %s", + type, (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; } unuse_pack(&w_curs); return type; @@@ -1675,9 -1685,12 +1697,12 @@@ static void *unpack_delta_entry(struct * This is costly but should happen only in the presence * of a corrupted pack, and is better than failing outright. */ - struct revindex_entry *revidx = find_pack_revindex(p, base_offset); - const unsigned char *base_sha1 = - nth_packed_object_sha1(p, revidx->nr); + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return NULL; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", sha1_to_hex(base_sha1), (uintmax_t)base_offset, @@@ -1706,6 -1719,8 +1731,8 @@@ return result; } + int do_check_packed_object_crc; + void *unpack_entry(struct packed_git *p, off_t obj_offset, enum object_type *type, unsigned long *sizep) { @@@ -1713,6 -1728,19 +1740,19 @@@ off_t curpos = obj_offset; void *data; + if (do_check_packed_object_crc && p->index_version > 1) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + unsigned long len = revidx[1].offset - obj_offset; + if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + const unsigned char *sha1 = + nth_packed_object_sha1(p, revidx->nr); + error("bad packed object CRC for %s", + sha1_to_hex(sha1)); + mark_bad_packed_object(p, sha1); + return NULL; + } + } + *type = unpack_object_header(p, &w_curs, &curpos, sizep); switch (*type) { case OBJ_OFS_DELTA: @@@ -1966,7 -1994,14 +2006,14 @@@ int sha1_object_info(const unsigned cha if (!find_pack_entry(sha1, &e, NULL)) return status; } - return packed_object_info(e.p, e.offset, sizep); + + status = packed_object_info(e.p, e.offset, sizep); + if (status < 0) { + mark_bad_packed_object(e.p, sha1); + status = sha1_object_info(sha1, sizep); + } + + return status; } static void *read_packed_sha1(const unsigned char *sha1, diff --combined t/t5302-pack-index.sh index b0b0fdaca5,29896141b9..884e24253a --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@@ -11,18 -11,13 +11,18 @@@ test_expect_success 'rm -rf .git git init && i=1 && - while test $i -le 100 + while test $i -le 100 do - i=`printf '%03i' $i` - echo $i >file_$i && - test-genrandom "$i" 8192 >>file_$i && - git update-index --add file_$i && - i=`expr $i + 1` || return 1 + iii=`printf '%03i' $i` + test-genrandom "bar" 200 > wide_delta_$iii && + test-genrandom "baz $iii" 50 >> wide_delta_$iii && + test-genrandom "foo"$i 100 > deep_delta_$iii && + test-genrandom "foo"`expr $i + 1` 100 >> deep_delta_$iii && + test-genrandom "foo"`expr $i + 2` 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=`expr $i + 1` || return 1 done && { echo 101 && test-genrandom 100 8192; } >file_101 && git update-index --add file_101 && @@@ -97,31 -92,6 +97,31 @@@ test_expect_success '64-bit offsets: index-pack result should match pack-objects one' \ 'cmp "test-3-${pack3}.idx" "3.idx"' +# returns the object number for given object in given pack index +index_obj_nr() +{ + idx_file=$1 + object_sha1=$2 + nr=0 + git show-index < $idx_file | + while read offs sha1 extra + do + nr=$(($nr + 1)) + test "$sha1" = "$object_sha1" || continue + echo "$(($nr - 1))" + break + done +} + +# returns the pack offset for given object as found in given pack index +index_obj_offset() +{ + idx_file=$1 + object_sha1=$2 + git show-index < $idx_file | grep $object_sha1 | + ( read offs extra && echo "$offs" ) +} + test_expect_success \ '[index v1] 1) stream pack to repository' \ 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && @@@ -132,22 -102,19 +132,22 @@@ test_expect_success \ '[index v1] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 && - git cat-file blob "$delta_sha1" > blob_1 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((256 * 4 + 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_2 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((4 + 256 * 4 + $nr_099 * 24)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo1' test_expect_success \ '[index v1] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_1 blob_2' + 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1' test_expect_success \ '[index v1] 4) confirm that the pack is actually corrupted' \ @@@ -173,22 -140,19 +173,22 @@@ test_expect_success test_expect_success \ '[index v2] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 delta_crc && - git cat-file blob "$delta_sha1" > blob_3 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((8 + 256 * 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_4 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((8 + 256 * 4 + $nr_099 * 20)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo2' test_expect_success \ '[index v2] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_3 blob_4' + 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2' test_expect_success \ '[index v2] 4) confirm that the pack is actually corrupted' \ @@@ -196,18 -160,17 +196,19 @@@ test_expect_success \ '[index v2] 5) pack-objects refuses to reuse corrupted data' \ - 'test_must_fail git pack-objects test-5 /dev/null || exit 1 done