From: Junio C Hamano Date: Tue, 31 Jan 2017 21:14:57 +0000 (-0800) Subject: Merge branch 'jk/loose-object-fsck' X-Git-Tag: v2.12.0-rc0~40 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/42ace93e41da0abe5a264fb8661f1c7de88206ec?hp=-c Merge branch 'jk/loose-object-fsck' "git fsck" inspects loose objects more carefully now. * jk/loose-object-fsck: fsck: detect trailing garbage in all object types fsck: parse loose object paths directly sha1_file: add read_loose_object() function t1450: test fsck of packed objects sha1_file: fix error message for alternate objects t1450: refactor loose-object removal --- 42ace93e41da0abe5a264fb8661f1c7de88206ec diff --combined cache.h index 00a029af36,ed45895706..5cac28788d --- a/cache.h +++ b/cache.h @@@ -514,6 -514,7 +514,6 @@@ extern void set_git_work_tree(const cha #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES" -extern const char **get_pathspec(const char *prefix, const char **pathspec); extern void setup_work_tree(void); extern const char *setup_git_directory_gently(int *); extern const char *setup_git_directory(void); @@@ -598,7 -599,7 +598,7 @@@ extern int chmod_index_entry(struct ind extern int ce_same_name(const struct cache_entry *a, const struct cache_entry *b); extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce); extern int index_name_is_other(const struct index_state *, const char *, int); -extern void *read_blob_data_from_index(struct index_state *, const char *, unsigned long *); +extern void *read_blob_data_from_index(const struct index_state *, const char *, unsigned long *); /* do stat comparison even if CE_VALID is true */ #define CE_MATCH_IGNORE_VALID 01 @@@ -669,7 -670,7 +669,7 @@@ extern const char *git_attributes_file extern const char *git_hooks_path; extern int zlib_compression_level; extern int core_compression_level; -extern int core_compression_seen; +extern int pack_compression_level; extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; @@@ -1063,11 -1064,8 +1063,11 @@@ static inline int is_absolute_path(cons return is_dir_sep(path[0]) || has_dos_drive_prefix(path); } int is_directory(const char *); +char *strbuf_realpath(struct strbuf *resolved, const char *path, + int die_on_error); const char *real_path(const char *path); const char *real_path_if_valid(const char *path); +char *real_pathdup(const char *path); const char *absolute_path(const char *path); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); @@@ -1127,8 -1125,7 +1127,8 @@@ extern int write_sha1_file(const void * extern int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, unsigned char *sha1, unsigned flags); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); -extern int git_open(const char *name); +extern int git_open_cloexec(const char *name, int flags); +#define git_open(name) git_open_cloexec(name, O_RDONLY) extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size); extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); extern int parse_sha1_header(const char *hdr, unsigned long *sizep); @@@ -1142,6 -1139,19 +1142,19 @@@ extern int finalize_object_file(const c extern int has_sha1_pack(const unsigned char *sha1); + /* + * Open the loose object at path, check its sha1, and return the contents, + * type, and size. If the object is a blob, then "contents" may return NULL, + * to allow streaming of large blobs. + * + * Returns 0 on success, negative on error (details may be written to stderr). + */ + int read_loose_object(const char *path, + const unsigned char *expected_sha1, + enum object_type *type, + unsigned long *size, + void **contents); + /* * Return true iff we have an object named sha1, whether local or in * an alternate object database, and whether packed or loose. This @@@ -1693,8 -1703,6 +1706,8 @@@ extern int git_default_config(const cha extern int git_config_from_file(config_fn_t fn, const char *, void *); extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type, const char *name, const char *buf, size_t len, void *data); +extern int git_config_from_blob_sha1(config_fn_t fn, const char *name, + const unsigned char *sha1, void *data); extern void git_config_push_parameter(const char *text); extern int git_config_from_parameters(config_fn_t fn, void *data); extern void git_config(config_fn_t fn, void *); diff --combined sha1_file.c index b5e827ac9e,b2c6648085..c40ef7111c --- a/sha1_file.c +++ b/sha1_file.c @@@ -26,7 -26,14 +26,7 @@@ #include "mru.h" #include "list.h" #include "mergesort.h" - -#ifndef O_NOATIME -#if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) -#define O_NOATIME 01000000 -#else -#define O_NOATIME 0 -#endif -#endif +#include "quote.h" #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } @@@ -284,7 -291,7 +284,7 @@@ static int link_alt_odb_entry(const cha struct strbuf pathbuf = STRBUF_INIT; if (!is_absolute_path(entry) && relative_base) { - strbuf_addstr(&pathbuf, real_path(relative_base)); + strbuf_realpath(&pathbuf, relative_base, 1); strbuf_addch(&pathbuf, '/'); } strbuf_addstr(&pathbuf, entry); @@@ -322,40 -329,13 +322,40 @@@ return 0; } +static const char *parse_alt_odb_entry(const char *string, + int sep, + struct strbuf *out) +{ + const char *end; + + strbuf_reset(out); + + if (*string == '#') { + /* comment; consume up to next separator */ + end = strchrnul(string, sep); + } else if (*string == '"' && !unquote_c_style(out, string, &end)) { + /* + * quoted path; unquote_c_style has copied the + * data for us and set "end". Broken quoting (e.g., + * an entry that doesn't end with a quote) falls + * back to the unquoted case below. + */ + } else { + /* normal, unquoted path */ + end = strchrnul(string, sep); + strbuf_add(out, string, end - string); + } + + if (*end) + end++; + return end; +} + static void link_alt_odb_entries(const char *alt, int len, int sep, const char *relative_base, int depth) { - struct string_list entries = STRING_LIST_INIT_NODUP; - char *alt_copy; - int i; struct strbuf objdirbuf = STRBUF_INIT; + struct strbuf entry = STRBUF_INIT; if (depth > 5) { error("%s: ignoring alternate object stores, nesting too deep.", @@@ -368,13 -348,16 +368,13 @@@ die("unable to normalize object directory: %s", objdirbuf.buf); - alt_copy = xmemdupz(alt, len); - string_list_split_in_place(&entries, alt_copy, sep, -1); - for (i = 0; i < entries.nr; i++) { - const char *entry = entries.items[i].string; - if (entry[0] == '\0' || entry[0] == '#') + while (*alt) { + alt = parse_alt_odb_entry(alt, sep, &entry); + if (!entry.len) continue; - link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf); + link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf); } - string_list_clear(&entries, 0); - free(alt_copy); + strbuf_release(&entry); strbuf_release(&objdirbuf); } @@@ -1603,66 -1586,81 +1603,81 @@@ int check_sha1_signature(const unsigne return hashcmp(sha1, real_sha1) ? -1 : 0; } -int git_open(const char *name) +int git_open_cloexec(const char *name, int flags) { - static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC; - - for (;;) { - int fd; - - errno = 0; - fd = open(name, O_RDONLY | sha1_file_open_flag); - if (fd >= 0) - return fd; + int fd; + static int o_cloexec = O_CLOEXEC; + fd = open(name, flags | o_cloexec); + if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) { /* Try again w/o O_CLOEXEC: the kernel might not support it */ - if ((sha1_file_open_flag & O_CLOEXEC) && errno == EINVAL) { - sha1_file_open_flag &= ~O_CLOEXEC; - continue; - } + o_cloexec &= ~O_CLOEXEC; + fd = open(name, flags | o_cloexec); + } - /* Might the failure be due to O_NOATIME? */ - if (errno != ENOENT && (sha1_file_open_flag & O_NOATIME)) { - sha1_file_open_flag &= ~O_NOATIME; - continue; +#if defined(F_GETFL) && defined(F_SETFL) && defined(FD_CLOEXEC) + { + static int fd_cloexec = FD_CLOEXEC; + + if (!o_cloexec && 0 <= fd && fd_cloexec) { + /* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */ + int flags = fcntl(fd, F_GETFL); + if (fcntl(fd, F_SETFL, flags | fd_cloexec)) + fd_cloexec = 0; } - return -1; } +#endif + return fd; } - static int stat_sha1_file(const unsigned char *sha1, struct stat *st) + /* + * Find "sha1" as a loose object in the local repository or in an alternate. + * Returns 0 on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to sha1_file_name(), etc. + */ + static int stat_sha1_file(const unsigned char *sha1, struct stat *st, + const char **path) { struct alternate_object_database *alt; - if (!lstat(sha1_file_name(sha1), st)) + *path = sha1_file_name(sha1); + if (!lstat(*path, st)) return 0; prepare_alt_odb(); errno = ENOENT; for (alt = alt_odb_list; alt; alt = alt->next) { - const char *path = alt_sha1_path(alt, sha1); - if (!lstat(path, st)) + *path = alt_sha1_path(alt, sha1); + if (!lstat(*path, st)) return 0; } return -1; } - static int open_sha1_file(const unsigned char *sha1) + /* + * Like stat_sha1_file(), but actually open the object and return the + * descriptor. See the caveats on the "path" parameter above. + */ + static int open_sha1_file(const unsigned char *sha1, const char **path) { int fd; struct alternate_object_database *alt; int most_interesting_errno; - fd = git_open(sha1_file_name(sha1)); + *path = sha1_file_name(sha1); + fd = git_open(*path); if (fd >= 0) return fd; most_interesting_errno = errno; prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { - const char *path = alt_sha1_path(alt, sha1); - fd = git_open(path); + *path = alt_sha1_path(alt, sha1); + fd = git_open(*path); if (fd >= 0) return fd; if (most_interesting_errno == ENOENT) @@@ -1672,12 -1670,21 +1687,21 @@@ return -1; } - void *map_sha1_file(const unsigned char *sha1, unsigned long *size) + /* + * Map the loose object at "path" if it is not NULL, or the path found by + * searching for a loose object named "sha1". + */ + static void *map_sha1_file_1(const char *path, + const unsigned char *sha1, + unsigned long *size) { void *map; int fd; - fd = open_sha1_file(sha1); + if (path) + fd = git_open(path); + else + fd = open_sha1_file(sha1, &path); map = NULL; if (fd >= 0) { struct stat st; @@@ -1686,7 -1693,7 +1710,7 @@@ *size = xsize_t(st.st_size); if (!*size) { /* mmap() is forbidden on empty files */ - error("object file %s is empty", sha1_file_name(sha1)); + error("object file %s is empty", path); return NULL; } map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); @@@ -1696,6 -1703,11 +1720,11 @@@ return map; } + void *map_sha1_file(const unsigned char *sha1, unsigned long *size) + { + return map_sha1_file_1(NULL, sha1, size); + } + unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { @@@ -2806,8 -2818,9 +2835,9 @@@ static int sha1_loose_object_info(cons * object even exists. */ if (!oi->typep && !oi->typename && !oi->sizep) { + const char *path; struct stat st; - if (stat_sha1_file(sha1, &st) < 0) + if (stat_sha1_file(sha1, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; @@@ -3003,6 -3016,8 +3033,8 @@@ void *read_sha1_file_extended(const uns { void *data; const struct packed_git *p; + const char *path; + struct stat st; const unsigned char *repl = lookup_replace_object_extended(sha1, flag); errno = 0; @@@ -3018,12 -3033,9 +3050,9 @@@ die("replacement %s not found for %s", sha1_to_hex(repl), sha1_to_hex(sha1)); - if (has_loose_object(repl)) { - const char *path = sha1_file_name(sha1); - + if (!stat_sha1_file(repl, &st, &path)) die("loose object %s (stored in %s) is corrupt", sha1_to_hex(repl), path); - } if ((p = has_packed_and_bad(repl)) != NULL) die("packed object %s (stored in %s) is corrupt", @@@ -3793,3 -3805,122 +3822,122 @@@ int for_each_packed_object(each_packed_ } return r ? r : pack_errors; } + + static int check_stream_sha1(git_zstream *stream, + const char *hdr, + unsigned long size, + const char *path, + const unsigned char *expected_sha1) + { + git_SHA_CTX c; + unsigned char real_sha1[GIT_SHA1_RAWSZ]; + unsigned char buf[4096]; + unsigned long total_read; + int status = Z_OK; + + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, stream->total_out); + + /* + * We already read some bytes into hdr, but the ones up to the NUL + * do not count against the object's content size. + */ + total_read = stream->total_out - strlen(hdr) - 1; + + /* + * This size comparison must be "<=" to read the final zlib packets; + * see the comment in unpack_sha1_rest for details. + */ + while (total_read <= size && + (status == Z_OK || status == Z_BUF_ERROR)) { + stream->next_out = buf; + stream->avail_out = sizeof(buf); + if (size - total_read < stream->avail_out) + stream->avail_out = size - total_read; + status = git_inflate(stream, Z_FINISH); + git_SHA1_Update(&c, buf, stream->next_out - buf); + total_read += stream->next_out - buf; + } + git_inflate_end(stream); + + if (status != Z_STREAM_END) { + error("corrupt loose object '%s'", sha1_to_hex(expected_sha1)); + return -1; + } + if (stream->avail_in) { + error("garbage at end of loose object '%s'", + sha1_to_hex(expected_sha1)); + return -1; + } + + git_SHA1_Final(real_sha1, &c); + if (hashcmp(expected_sha1, real_sha1)) { + error("sha1 mismatch for %s (expected %s)", path, + sha1_to_hex(expected_sha1)); + return -1; + } + + return 0; + } + + int read_loose_object(const char *path, + const unsigned char *expected_sha1, + enum object_type *type, + unsigned long *size, + void **contents) + { + int ret = -1; + int fd = -1; + void *map = NULL; + unsigned long mapsize; + git_zstream stream; + char hdr[32]; + + *contents = NULL; + + map = map_sha1_file_1(path, NULL, &mapsize); + if (!map) { + error_errno("unable to mmap %s", path); + goto out; + } + + if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { + error("unable to unpack header of %s", path); + goto out; + } + + *type = parse_sha1_header(hdr, size); + if (*type < 0) { + error("unable to parse header of %s", path); + git_inflate_end(&stream); + goto out; + } + + if (*type == OBJ_BLOB) { + if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) + goto out; + } else { + *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1); + if (!*contents) { + error("unable to unpack contents of %s", path); + git_inflate_end(&stream); + goto out; + } + if (check_sha1_signature(expected_sha1, *contents, + *size, typename(*type))) { + error("sha1 mismatch for %s (expected %s)", path, + sha1_to_hex(expected_sha1)); + free(*contents); + goto out; + } + } + + ret = 0; /* everything checks out */ + + out: + if (map) + munmap(map, mapsize); + if (fd >= 0) + close(fd); + return ret; + }