From: Junio C Hamano Date: Tue, 29 May 2018 08:09:58 +0000 (+0900) Subject: Sync with Git 2.17.1 X-Git-Tag: v2.18.0-rc0~34 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/7913f53b5628997165e075008d6142da1c04271a?hp=-c Sync with Git 2.17.1 * maint: (25 commits) Git 2.17.1 Git 2.16.4 Git 2.15.2 Git 2.14.4 Git 2.13.7 fsck: complain when .gitmodules is a symlink index-pack: check .gitmodules files with --strict unpack-objects: call fsck_finish() after fscking objects fsck: call fsck_finish() after fscking objects fsck: check .gitmodules content fsck: handle promisor objects in .gitmodules check fsck: detect gitmodules files fsck: actually fsck blob data fsck: simplify ".git" check index-pack: make fsck error message more specific verify_path: disallow symlinks in .gitmodules update-index: stat updated files earlier verify_dotfile: mention case-insensitivity in comment verify_path: drop clever fallthrough skip_prefix: add case-insensitive variant ... --- 7913f53b5628997165e075008d6142da1c04271a diff --combined apply.c index 7e5792c996,2d1cfe4dbb..d80b26bc33 --- a/apply.c +++ b/apply.c @@@ -3180,7 -3180,7 +3180,7 @@@ static int apply_binary(struct apply_st unsigned long size; char *result; - result = read_sha1_file(oid.hash, &type, &size); + result = read_object_file(&oid, &type, &size); if (!result) return error(_("the necessary postimage %s for " "'%s' cannot be read"), @@@ -3242,7 -3242,7 +3242,7 @@@ static int read_blob_object(struct strb unsigned long sz; char *result; - result = read_sha1_file(oid->hash, &type, &sz); + result = read_object_file(oid, &type, &sz); if (!result) return -1; /* XXX read_sha1_file NUL-terminates */ @@@ -3860,9 -3860,9 +3860,9 @@@ static int check_unsafe_path(struct pat if (!patch->is_delete) new_name = patch->new_name; - if (old_name && !verify_path(old_name)) + if (old_name && !verify_path(old_name, patch->old_mode)) return error(_("invalid path '%s'"), old_name); - if (new_name && !verify_path(new_name)) + if (new_name && !verify_path(new_name, patch->new_mode)) return error(_("invalid path '%s'"), new_name); return 0; } diff --combined builtin/fsck.c index 9d59d7d5a2,028aba52eb..916109ac1c --- a/builtin/fsck.c +++ b/builtin/fsck.c @@@ -1,6 -1,5 +1,6 @@@ #include "builtin.h" #include "cache.h" +#include "repository.h" #include "config.h" #include "commit.h" #include "tree.h" @@@ -17,7 -16,6 +17,7 @@@ #include "streaming.h" #include "decorate.h" #include "packfile.h" +#include "object-store.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@@ -67,8 -65,7 +67,8 @@@ static const char *printable_type(struc const char *ret; if (obj->type == OBJ_NONE) { - enum object_type type = sha1_object_info(obj->oid.hash, NULL); + enum object_type type = oid_object_info(the_repository, + &obj->oid, NULL); if (type > 0) object_as_type(obj, type, 0); } @@@ -340,7 -337,7 +340,7 @@@ static void check_connectivity(void } } - static int fsck_obj(struct object *obj) + static int fsck_obj(struct object *obj, void *buffer, unsigned long size) { int err; @@@ -354,7 -351,7 +354,7 @@@ if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); - err = fsck_object(obj, NULL, 0, &fsck_obj_options); + err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@@ -399,7 -396,7 +399,7 @@@ static int fsck_obj_buffer(const struc } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj); + return fsck_obj(obj, buffer, size); } static int default_refs; @@@ -507,44 -504,42 +507,42 @@@ static void get_default_heads(void } } - static struct object *parse_loose_object(const struct object_id *oid, - const char *path) + static int fsck_loose(const struct object_id *oid, const char *path, void *data) { struct object *obj; - void *contents; enum object_type type; unsigned long size; + void *contents; int eaten; - if (read_loose_object(path, oid, &type, &size, &contents) < 0) - return NULL; - if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) { ++ if (read_loose_object(path, oid, &type, &size, &contents) < 0) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + oid_to_hex(oid), path); + return 0; /* keep checking other objects */ + } if (!contents && type != OBJ_BLOB) - die("BUG: read_loose_object streamed a non-blob"); + BUG("read_loose_object streamed a non-blob"); obj = parse_object_buffer(oid, type, size, contents, &eaten); - - if (!eaten) - free(contents); - return obj; - } - - static int fsck_loose(const struct object_id *oid, const char *path, void *data) - { - struct object *obj = parse_loose_object(oid, path); - if (!obj) { errors_found |= ERROR_OBJECT; - error("%s: object corrupt or missing: %s", + error("%s: object could not be parsed: %s", oid_to_hex(oid), path); + if (!eaten) + free(contents); return 0; /* keep checking other objects */ } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj)) + if (fsck_obj(obj, contents, size)) errors_found |= ERROR_OBJECT; - return 0; + + if (!eaten) + free(contents); + return 0; /* keep checking other objects, even if we saw an error */ } static int fsck_cruft(const char *basename, const char *path, void *data) @@@ -722,12 -717,9 +720,12 @@@ int cmd_fsck(int argc, const char **arg for_each_loose_object(mark_loose_for_connectivity, NULL, 0); for_each_packed_object(mark_packed_for_connectivity, NULL, 0); } else { + struct alternate_object_database *alt_odb_list; + fsck_object_dir(get_object_directory()); - prepare_alt_odb(); + prepare_alt_odb(the_repository); + alt_odb_list = the_repository->objects->alt_odb_list; for (alt = alt_odb_list; alt; alt = alt->next) fsck_object_dir(alt->path); @@@ -736,9 -728,10 +734,9 @@@ uint32_t total = 0, count = 0; struct progress *progress = NULL; - prepare_packed_git(); - if (show_progress) { - for (p = packed_git; p; p = p->next) { + for (p = get_packed_git(the_repository); p; + p = p->next) { if (open_pack_index(p)) continue; total += p->num_objects; @@@ -746,8 -739,7 +744,8 @@@ progress = start_progress(_("Checking objects"), total); } - for (p = packed_git; p; p = p->next) { + for (p = get_packed_git(the_repository); p; + p = p->next) { /* verify gives error messages itself */ if (verify_pack(p, fsck_obj_buffer, progress, count)) @@@ -756,6 -748,9 +754,9 @@@ } stop_progress(&progress); } + + if (fsck_finish(&fsck_obj_options)) + errors_found |= ERROR_OBJECT; } for (i = 0; i < argc; i++) { diff --combined builtin/index-pack.c index e2f670bef9,7b2f7c0470..59130e8ecb --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@@ -9,11 -9,10 +9,11 @@@ #include "tree.h" #include "progress.h" #include "fsck.h" -#include "exec_cmd.h" +#include "exec-cmd.h" #include "streaming.h" #include "thread-utils.h" #include "packfile.h" +#include "object-store.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@@ -60,7 -59,7 +60,7 @@@ struct ofs_delta_entry }; struct ref_delta_entry { - unsigned char sha1[20]; + struct object_id oid; int obj_no; }; @@@ -223,7 -222,7 +223,7 @@@ static unsigned check_object(struct obj if (!(obj->flags & FLAG_CHECKED)) { unsigned long size; - int type = sha1_object_info(obj->oid.hash, &size); + int type = oid_object_info(the_repository, &obj->oid, &size); if (type <= 0) die(_("did not receive expected object %s"), oid_to_hex(&obj->oid)); @@@ -673,18 -672,18 +673,18 @@@ static void find_ofs_delta_children(off *last_index = last; } -static int compare_ref_delta_bases(const unsigned char *sha1, - const unsigned char *sha2, +static int compare_ref_delta_bases(const struct object_id *oid1, + const struct object_id *oid2, enum object_type type1, enum object_type type2) { int cmp = type1 - type2; if (cmp) return cmp; - return hashcmp(sha1, sha2); + return oidcmp(oid1, oid2); } -static int find_ref_delta(const unsigned char *sha1, enum object_type type) +static int find_ref_delta(const struct object_id *oid, enum object_type type) { int first = 0, last = nr_ref_deltas; @@@ -693,7 -692,7 +693,7 @@@ struct ref_delta_entry *delta = &ref_deltas[next]; int cmp; - cmp = compare_ref_delta_bases(sha1, delta->sha1, + cmp = compare_ref_delta_bases(oid, &delta->oid, type, objects[delta->obj_no].type); if (!cmp) return next; @@@ -706,11 -705,11 +706,11 @@@ return -first-1; } -static void find_ref_delta_children(const unsigned char *sha1, +static void find_ref_delta_children(const struct object_id *oid, int *first_index, int *last_index, enum object_type type) { - int first = find_ref_delta(sha1, type); + int first = find_ref_delta(oid, type); int last = first; int end = nr_ref_deltas - 1; @@@ -719,9 -718,9 +719,9 @@@ *last_index = -1; return; } - while (first > 0 && !hashcmp(ref_deltas[first - 1].sha1, sha1)) + while (first > 0 && !oidcmp(&ref_deltas[first - 1].oid, oid)) --first; - while (last < end && !hashcmp(ref_deltas[last + 1].sha1, sha1)) + while (last < end && !oidcmp(&ref_deltas[last + 1].oid, oid)) ++last; *first_index = first; *last_index = last; @@@ -773,7 -772,7 +773,7 @@@ static int check_collison(struct object memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(entry->idx.oid.hash, &type, &size, NULL); + data.st = open_istream(&entry->idx.oid, &type, &size, NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) @@@ -812,12 -811,12 +812,12 @@@ static void sha1_object(const void *dat enum object_type has_type; unsigned long has_size; read_lock(); - has_type = sha1_object_info(oid->hash, &has_size); + has_type = oid_object_info(the_repository, oid, &has_size); if (has_type < 0) die(_("cannot read existing object info %s"), oid_to_hex(oid)); if (has_type != type || has_size != size) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(oid)); - has_data = read_sha1_file(oid->hash, &has_type, &has_size); + has_data = read_object_file(oid, &has_type, &has_size); read_unlock(); if (!data) data = new_data = get_data_from_pack(obj_entry); @@@ -837,6 -836,9 +837,9 @@@ blob->object.flags |= FLAG_CHECKED; else die(_("invalid blob object %s"), oid_to_hex(oid)); + if (do_fsck_object && + fsck_object(&blob->object, (void *)data, size, &fsck_options)) + die(_("fsck error in packed object")); } else { struct object *obj; int eaten; @@@ -854,7 -856,7 +857,7 @@@ die(_("invalid %s"), type_name(type)); if (do_fsck_object && fsck_object(obj, buf, size, &fsck_options)) - die(_("Error in object")); + die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); @@@ -993,7 -995,7 +996,7 @@@ static struct base_data *find_unresolve struct base_data *prev_base) { if (base->ref_last == -1 && base->ofs_last == -1) { - find_ref_delta_children(base->obj->idx.oid.hash, + find_ref_delta_children(&base->obj->idx.oid, &base->ref_first, &base->ref_last, OBJ_REF_DELTA); @@@ -1077,7 -1079,7 +1080,7 @@@ static int compare_ref_delta_entry(cons const struct ref_delta_entry *delta_a = a; const struct ref_delta_entry *delta_b = b; - return hashcmp(delta_a->sha1, delta_b->sha1); + return oidcmp(&delta_a->oid, &delta_b->oid); } static void resolve_base(struct object_entry *obj) @@@ -1143,7 -1145,7 +1146,7 @@@ static void parse_pack_objects(unsigne ofs_delta++; } else if (obj->type == OBJ_REF_DELTA) { ALLOC_GROW(ref_deltas, nr_ref_deltas + 1, ref_deltas_alloc); - hashcpy(ref_deltas[nr_ref_deltas].sha1, ref_delta_oid.hash); + oidcpy(&ref_deltas[nr_ref_deltas].oid, &ref_delta_oid); ref_deltas[nr_ref_deltas].obj_no = i; nr_ref_deltas++; } else if (!data) { @@@ -1271,7 -1273,7 +1274,7 @@@ static void conclude_pack(int fix_thin_ nr_objects - nr_objects_initial); stop_progress_msg(&progress, msg.buf); strbuf_release(&msg); - hashclose(f, tail_hash, 0); + finalize_hashfile(f, tail_hash, 0); hashcpy(read_hash, pack_hash); fixup_pack_header_footer(output_fd, pack_hash, curr_pack, nr_objects, @@@ -1375,15 -1377,14 +1378,15 @@@ static void fix_unresolved_deltas(struc if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - base_obj->data = read_sha1_file(d->sha1, &type, &base_obj->size); + base_obj->data = read_object_file(&d->oid, &type, + &base_obj->size); if (!base_obj->data) continue; - if (check_sha1_signature(d->sha1, base_obj->data, + if (check_object_signature(&d->oid, base_obj->data, base_obj->size, type_name(type))) - die(_("local object %s is corrupt"), sha1_to_hex(d->sha1)); - base_obj->obj = append_obj_to_pack(f, d->sha1, + die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); + base_obj->obj = append_obj_to_pack(f, d->oid.hash, base_obj->data, base_obj->size, type); find_unresolved_deltas(base_obj); display_progress(progress, nr_resolved_deltas); @@@ -1479,6 -1480,9 +1482,9 @@@ static void final(const char *final_pac } else chmod(final_index_name, 0444); + if (do_fsck_object) + add_packed_git(final_index_name, strlen(final_index_name), 0); + if (!from_stdin) { printf("%s\n", sha1_to_hex(hash)); } else { @@@ -1593,7 -1597,7 +1599,7 @@@ static void read_idx_option(struct pack /* * Get rid of the idx file as we do not need it anymore. * NEEDSWORK: extract this bit from free_pack_by_name() in - * sha1_file.c, perhaps? It shouldn't matter very much as we + * sha1-file.c, perhaps? It shouldn't matter very much as we * know we haven't installed this pack (hence we never have * read anything from it). */ @@@ -1820,6 -1824,10 +1826,10 @@@ int cmd_index_pack(int argc, const cha pack_hash); else close(input_fd); + + if (do_fsck_object && fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + free(objects); strbuf_release(&index_name_buf); if (pack_name == NULL) diff --combined builtin/submodule--helper.c index c2403a915f,4f35c98bb9..df841d4ab3 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@@ -16,7 -16,6 +16,7 @@@ #include "revision.h" #include "diffcore.h" #include "diff.h" +#include "object-store.h" #define OPT_QUIET (1 << 0) #define OPT_CACHED (1 << 1) @@@ -455,7 -454,7 +455,7 @@@ static void init_submodule(const char * displaypath = get_submodule_displaypath(path, prefix); - sub = submodule_from_path(&null_oid, path); + sub = submodule_from_path(the_repository, &null_oid, path); if (!sub) die(_("No url found for submodule path '%s' in .gitmodules"), @@@ -596,12 -595,8 +596,12 @@@ static void print_status(unsigned int f printf("%c%s %s", state, oid_to_hex(oid), displaypath); - if (state == ' ' || state == '+') - printf(" (%s)", compute_rev_name(path, oid_to_hex(oid))); + if (state == ' ' || state == '+') { + const char *name = compute_rev_name(path, oid_to_hex(oid)); + + if (name) + printf(" (%s)", name); + } printf("\n"); } @@@ -626,7 -621,7 +626,7 @@@ static void status_submodule(const cha struct rev_info rev; int diff_files_result; - if (!submodule_from_path(&null_oid, path)) + if (!submodule_from_path(the_repository, &null_oid, path)) die(_("no submodule mapping found in .gitmodules for path '%s'"), path); @@@ -659,13 -654,9 +659,13 @@@ displaypath); } else if (!(flags & OPT_CACHED)) { struct object_id oid; + struct ref_store *refs = get_submodule_ref_store(path); - if (refs_head_ref(get_submodule_ref_store(path), - handle_submodule_head_ref, &oid)) + if (!refs) { + print_status(flags, '-', path, ce_oid, displaypath); + goto cleanup; + } + if (refs_head_ref(refs, handle_submodule_head_ref, &oid)) die(_("could not resolve HEAD ref inside the " "submodule '%s'"), path); @@@ -750,7 -741,7 +750,7 @@@ static int module_name(int argc, const if (argc != 2) usage(_("git submodule--helper name ")); - sub = submodule_from_path(&null_oid, argv[1]); + sub = submodule_from_path(the_repository, &null_oid, argv[1]); if (!sub) die(_("no submodule mapping found in .gitmodules for path '%s'"), @@@ -781,7 -772,7 +781,7 @@@ static void sync_submodule(const char * if (!is_submodule_active(the_repository, path)) return; - sub = submodule_from_path(&null_oid, path); + sub = submodule_from_path(the_repository, &null_oid, path); if (sub && sub->url) { if (starts_with_dot_dot_slash(sub->url) || @@@ -934,7 -925,7 +934,7 @@@ static void deinit_submodule(const cha struct strbuf sb_config = STRBUF_INIT; char *sub_git_dir = xstrfmt("%s/.git", path); - sub = submodule_from_path(&null_oid, path); + sub = submodule_from_path(the_repository, &null_oid, path); if (!sub || !sub->name) goto cleanup; @@@ -1268,7 -1259,8 +1268,7 @@@ static int module_clone(int argc, cons strbuf_reset(&sb); } - /* Connect module worktree and git dir */ - connect_work_tree_and_git_dir(path, sm_gitdir); + connect_work_tree_and_git_dir(path, sm_gitdir, 0); p = git_pathdup_submodule(path, "config"); if (!p) @@@ -1375,7 -1367,7 +1375,7 @@@ static int prepare_to_clone_next_submod goto cleanup; } - sub = submodule_from_path(&null_oid, ce->name); + sub = submodule_from_path(the_repository, &null_oid, ce->name); if (suc->recursive_prefix) displaypath = relative_path(suc->recursive_prefix, @@@ -1658,7 -1650,7 +1658,7 @@@ static const char *remote_submodule_bra const char *branch = NULL; char *key; - sub = submodule_from_path(&null_oid, path); + sub = submodule_from_path(the_repository, &null_oid, path); if (!sub) return NULL; @@@ -1825,6 -1817,29 +1825,29 @@@ static int is_active(int argc, const ch return !is_submodule_active(the_repository, argv[1]); } + /* + * Exit non-zero if any of the submodule names given on the command line is + * invalid. If no names are given, filter stdin to print only valid names + * (which is primarily intended for testing). + */ + static int check_name(int argc, const char **argv, const char *prefix) + { + if (argc > 1) { + while (*++argv) { + if (check_submodule_name(*argv) < 0) + return 1; + } + } else { + struct strbuf buf = STRBUF_INIT; + while (strbuf_getline(&buf, stdin) != EOF) { + if (!check_submodule_name(buf.buf)) + printf("%s\n", buf.buf); + } + strbuf_release(&buf); + } + return 0; + } + #define SUPPORT_SUPER_PREFIX (1<<0) struct cmd_struct { @@@ -1850,6 -1865,7 +1873,7 @@@ static struct cmd_struct commands[] = {"push-check", push_check, 0}, {"absorb-git-dirs", absorb_git_dirs, SUPPORT_SUPER_PREFIX}, {"is-active", is_active, 0}, + {"check-name", check_name, 0}, }; int cmd_submodule__helper(int argc, const char **argv, const char *prefix) diff --combined builtin/unpack-objects.c index cfe9019f80,c8f1406d23..6e81ca8ca2 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@@ -199,7 -199,7 +199,7 @@@ static int check_object(struct object * if (!(obj->flags & FLAG_OPEN)) { unsigned long size; - int type = sha1_object_info(obj->oid.hash, &size); + int type = oid_object_info(the_repository, &obj->oid, &size); if (type != obj->type || type <= 0) die("object of unexpected type"); obj->flags |= FLAG_WRITTEN; @@@ -210,7 -210,7 +210,7 @@@ if (!obj_buf) die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) - die("Error in object"); + die("fsck error in packed object"); fsck_options.walk = check_object; if (fsck_walk(obj, NULL, &fsck_options)) die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); @@@ -423,7 -423,7 +423,7 @@@ static void unpack_delta_entry(enum obj if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) return; - base = read_sha1_file(base_oid.hash, &type, &base_size); + base = read_object_file(&base_oid, &type, &base_size); if (!base) { error("failed to read delta-pack base object %s", oid_to_hex(&base_oid)); @@@ -572,8 -572,11 +572,11 @@@ int cmd_unpack_objects(int argc, const unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); the_hash_algo->final_fn(oid.hash, &ctx); - if (strict) + if (strict) { write_rest(); + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); diff --combined builtin/update-index.c index 10d070a76f,1af8a00b88..6598bc06ad --- a/builtin/update-index.c +++ b/builtin/update-index.c @@@ -364,10 -364,9 +364,9 @@@ static int process_directory(const cha return error("%s: is a directory - add files inside instead", path); } - static int process_path(const char *path) + static int process_path(const char *path, struct stat *st, int stat_errno) { int pos, len; - struct stat st; const struct cache_entry *ce; len = strlen(path); @@@ -391,13 -390,13 +390,13 @@@ * First things first: get the stat information, to decide * what to do about the pathname! */ - if (lstat(path, &st) < 0) - return process_lstat_error(path, errno); + if (stat_errno) + return process_lstat_error(path, stat_errno); - if (S_ISDIR(st.st_mode)) - return process_directory(path, len, &st); + if (S_ISDIR(st->st_mode)) + return process_directory(path, len, st); - return add_one_path(ce, path, len, &st); + return add_one_path(ce, path, len, st); } static int add_cacheinfo(unsigned int mode, const struct object_id *oid, @@@ -406,7 -405,7 +405,7 @@@ int size, len, option; struct cache_entry *ce; - if (!verify_path(path)) + if (!verify_path(path, mode)) return error("Invalid path '%s'", path); len = strlen(path); @@@ -449,7 -448,18 +448,18 @@@ static void chmod_path(char flip, cons static void update_one(const char *path) { - if (!verify_path(path)) { + int stat_errno = 0; + struct stat st; + + if (mark_valid_only || mark_skip_worktree_only || force_remove || + mark_fsmonitor_only) + st.st_mode = 0; + else if (lstat(path, &st) < 0) { + st.st_mode = 0; + stat_errno = errno; + } /* else stat is valid */ + + if (!verify_path(path, st.st_mode)) { fprintf(stderr, "Ignoring path %s\n", path); return; } @@@ -475,7 -485,7 +485,7 @@@ report("remove '%s'", path); return; } - if (process_path(path)) + if (process_path(path, &st, stat_errno)) die("Unable to process path %s", path); report("add '%s'", path); } @@@ -545,7 -555,7 +555,7 @@@ static void read_index_info(int nul_ter path_name = uq.buf; } - if (!verify_path(path_name)) { + if (!verify_path(path_name, mode)) { fprintf(stderr, "Ignoring path %s\n", path_name); continue; } @@@ -592,7 -602,7 +602,7 @@@ static struct cache_entry *read_one_ent int size; struct cache_entry *ce; - if (get_tree_entry(ent->hash, path, oid.hash, &mode)) { + if (get_tree_entry(ent, path, &oid, &mode)) { if (which) error("%s: not in %s branch.", path, which); return NULL; @@@ -1059,7 -1069,6 +1069,7 @@@ int cmd_update_index(int argc, const ch break; switch (parseopt_state) { case PARSE_OPT_HELP: + case PARSE_OPT_ERROR: exit(129); case PARSE_OPT_NON_OPTION: case PARSE_OPT_DONE: diff --combined cache.h index 6dedf3c4f9,0323853c99..3bbe4f8845 --- a/cache.h +++ b/cache.h @@@ -373,13 -373,6 +373,13 @@@ extern void free_name_hash(struct index #define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz)) #endif +#define TYPE_BITS 3 + +/* + * Values in this enum (except those outside the 3 bit range) are part + * of pack file format. See Documentation/technical/pack-format.txt + * for more information. + */ enum object_type { OBJ_BAD = -1, OBJ_NONE = 0, @@@ -435,7 -428,6 +435,7 @@@ static inline enum object_type object_t #define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS" #define GIT_QUARANTINE_ENVIRONMENT "GIT_QUARANTINE_PATH" #define GIT_OPTIONAL_LOCKS_ENVIRONMENT "GIT_OPTIONAL_LOCKS" +#define GIT_TEXT_DOMAIN_DIR_ENVIRONMENT "GIT_TEXTDOMAINDIR" /* * Environment variable used in handshaking the wire protocol. @@@ -467,7 -459,7 +467,7 @@@ */ extern const char * const local_repo_env[]; -extern void setup_git_env(void); +extern void setup_git_env(const char *git_dir); /* * Returns true iff we have a configured git repository (either via @@@ -485,7 -477,7 +485,7 @@@ extern const char *get_git_common_dir(v extern char *get_object_directory(void); extern char *get_index_file(void); extern char *get_graft_file(void); -extern int set_git_dir(const char *path); +extern void set_git_dir(const char *path); extern int get_common_dir_noenv(struct strbuf *sb, const char *gitdir); extern int get_common_dir(struct strbuf *sb, const char *gitdir); extern const char *get_git_namespace(void); @@@ -642,7 -634,7 +642,7 @@@ extern int unmerged_index(const struct */ extern int index_has_changes(struct strbuf *sb); - extern int verify_path(const char *path); + extern int verify_path(const char *path, unsigned mode); extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); @@@ -813,7 -805,6 +813,7 @@@ extern char *git_replace_ref_base extern int fsync_object_files; extern int core_preload_index; +extern int core_commit_graph; extern int core_apply_sparse_checkout; extern int precomposed_unicode; extern int protect_hfs; @@@ -949,6 -940,12 +949,6 @@@ extern void check_repository_format(voi #define DATA_CHANGED 0x0020 #define TYPE_CHANGED 0x0040 -/* - * Put in `buf` the name of the file in the local object database that - * would be used to store a loose object with the specified sha1. - */ -extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1); - /* * Return an abbreviated sha1 unique within this repository's object database. * The result will be at least `len` characters long, and will be NUL @@@ -958,14 -955,14 +958,14 @@@ * more calls to find_unique_abbrev are made. * * The `_r` variant writes to a buffer supplied by the caller, which must be at - * least `GIT_SHA1_HEXSZ + 1` bytes. The return value is the number of bytes + * least `GIT_MAX_HEXSZ + 1` bytes. The return value is the number of bytes * written (excluding the NUL terminator). * * Note that while this version avoids the static buffer, it is not fully * reentrant, as it calls into other non-reentrant git code. */ -extern const char *find_unique_abbrev(const unsigned char *sha1, int len); -extern int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len); +extern const char *find_unique_abbrev(const struct object_id *oid, int len); +extern int find_unique_abbrev_r(char *hex, const struct object_id *oid, int len); extern const unsigned char null_sha1[GIT_MAX_RAWSZ]; extern const struct object_id null_oid; @@@ -1168,7 -1165,15 +1168,15 @@@ int normalize_path_copy(char *dst, cons int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); - extern int is_ntfs_dotgit(const char *name); + + /* + * These functions match their is_hfs_dotgit() counterparts; see utf8.h for + * details. + */ + int is_ntfs_dotgit(const char *name); + int is_ntfs_dotgitmodules(const char *name); + int is_ntfs_dotgitignore(const char *name); + int is_ntfs_dotgitattributes(const char *name); /* * Returns true iff "str" could be confused as a command-line option when @@@ -1192,16 -1197,35 +1200,16 @@@ extern char *xdg_config_home(const cha */ extern char *xdg_cache_home(const char *filename); -extern void *read_sha1_file_extended(const unsigned char *sha1, - enum object_type *type, - unsigned long *size, int lookup_replace); -static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) +extern void *read_object_file_extended(const struct object_id *oid, + enum object_type *type, + unsigned long *size, int lookup_replace); +static inline void *read_object_file(const struct object_id *oid, enum object_type *type, unsigned long *size) { - return read_sha1_file_extended(sha1, type, size, 1); + return read_object_file_extended(oid, type, size, 1); } -/* - * This internal function is only declared here for the benefit of - * lookup_replace_object(). Please do not call it directly. - */ -extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); - -/* - * If object sha1 should be replaced, return the replacement object's - * name (replaced recursively, if necessary). The return value is - * either sha1 or a pointer to a permanently-allocated value. When - * object replacement is suppressed, always return sha1. - */ -static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) -{ - if (!check_replace_refs) - return sha1; - return do_lookup_replace_object(sha1); -} - -/* Read and unpack a sha1 file into memory, write memory to a sha1 file */ -extern int sha1_object_info(const unsigned char *, unsigned long *); +/* Read and unpack an object file into memory, write memory to an object file */ +int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); extern int hash_object_file(const void *buf, unsigned long len, const char *type, struct object_id *oid); @@@ -1220,22 -1244,23 +1228,22 @@@ extern int force_object_loose(const str extern int git_open_cloexec(const char *name, int flags); #define git_open(name) git_open_cloexec(name, O_RDONLY) -extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size); extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); extern int parse_sha1_header(const char *hdr, unsigned long *sizep); -extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); +extern int check_object_signature(const struct object_id *oid, void *buf, unsigned long size, const char *type); extern int finalize_object_file(const char *tmpfile, const char *filename); /* - * Open the loose object at path, check its sha1, and return the contents, + * Open the loose object at path, check its hash, and return the contents, * type, and size. If the object is a blob, then "contents" may return NULL, * to allow streaming of large blobs. * * Returns 0 on success, negative on error (details may be written to stderr). */ int read_loose_object(const char *path, - const unsigned char *expected_sha1, + const struct object_id *expected_oid, enum object_type *type, unsigned long *size, void **contents); @@@ -1262,7 -1287,7 +1270,7 @@@ extern int has_object_file_with_flags(c */ extern int has_loose_object_nonlocal(const unsigned char *sha1); -extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect); +extern void assert_oid_type(const struct object_id *oid, enum object_type expect); /* Helper to check and "touch" a file */ extern int check_and_freshen_file(const char *fn, int freshen); @@@ -1418,10 -1443,10 +1426,10 @@@ extern int df_name_compare(const char * extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2); extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2); -extern void *read_object_with_reference(const unsigned char *sha1, +extern void *read_object_with_reference(const struct object_id *oid, const char *required_type, unsigned long *size, - unsigned char *sha1_ret); + struct object_id *oid_ret); extern struct object *peel_to_type(const char *name, int namelen, struct object *o, enum object_type); @@@ -1547,6 -1572,57 +1555,6 @@@ extern int has_dirs_only_path(const cha extern void schedule_dir_for_removal(const char *name, int len); extern void remove_scheduled_dirs(void); -extern struct alternate_object_database { - struct alternate_object_database *next; - - /* see alt_scratch_buf() */ - struct strbuf scratch; - size_t base_len; - - /* - * Used to store the results of readdir(3) calls when searching - * for unique abbreviated hashes. This cache is never - * invalidated, thus it's racy and not necessarily accurate. - * That's fine for its purpose; don't use it for tasks requiring - * greater accuracy! - */ - char loose_objects_subdir_seen[256]; - struct oid_array loose_objects_cache; - - char path[FLEX_ARRAY]; -} *alt_odb_list; -extern void prepare_alt_odb(void); -extern char *compute_alternate_path(const char *path, struct strbuf *err); -typedef int alt_odb_fn(struct alternate_object_database *, void *); -extern int foreach_alt_odb(alt_odb_fn, void*); - -/* - * Allocate a "struct alternate_object_database" but do _not_ actually - * add it to the list of alternates. - */ -struct alternate_object_database *alloc_alt_odb(const char *dir); - -/* - * Add the directory to the on-disk alternates file; the new entry will also - * take effect in the current process. - */ -extern void add_to_alternates_file(const char *dir); - -/* - * Add the directory to the in-memory list of alternates (along with any - * recursive alternates it points to), but do not modify the on-disk alternates - * file. - */ -extern void add_to_alternates_memory(const char *dir); - -/* - * Returns a scratch strbuf pre-filled with the alternate object directory, - * including a trailing slash, which can be used to access paths in the - * alternate. Always use this over direct access to alt->scratch, as it - * cleans up any previous use of the scratch buffer. - */ -extern struct strbuf *alt_scratch_buf(struct alternate_object_database *alt); - struct pack_window { struct pack_window *next; unsigned char *base; @@@ -1556,6 -1632,35 +1564,6 @@@ unsigned int inuse_cnt; }; -extern struct packed_git { - struct packed_git *next; - struct list_head mru; - struct pack_window *windows; - off_t pack_size; - const void *index_data; - size_t index_size; - uint32_t num_objects; - uint32_t num_bad_objects; - unsigned char *bad_object_sha1; - int index_version; - time_t mtime; - int pack_fd; - unsigned pack_local:1, - pack_keep:1, - freshened:1, - do_not_close:1, - pack_promisor:1; - unsigned char sha1[20]; - struct revindex_entry *revindex; - /* something like ".git/objects/pack/xxxxx.pack" */ - char pack_name[FLEX_ARRAY]; /* more */ -} *packed_git; - -/* - * A most-recently-used ordered version of the packed_git list. - */ -extern struct list_head packed_git_mru; - struct pack_entry { off_t offset; unsigned char sha1[20]; @@@ -1680,12 -1785,7 +1688,12 @@@ struct object_info #define OBJECT_INFO_SKIP_CACHED 4 /* Do not retry packed storage after checking packed and loose storage */ #define OBJECT_INFO_QUICK 8 -extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); +/* Do not check loose object */ +#define OBJECT_INFO_IGNORE_LOOSE 16 + +int oid_object_info_extended(struct repository *r, + const struct object_id *, + struct object_info *, unsigned flags); /* * Set this to 0 to prevent sha1_object_info_extended() from fetching missing diff --combined dir.c index be08d3d296,41aac3b7b3..7ca730fac4 --- a/dir.c +++ b/dir.c @@@ -19,7 -19,6 +19,7 @@@ #include "varint.h" #include "ewah/ewok.h" #include "fsmonitor.h" +#include "submodule-config.h" /* * Tells read_directory_recursive how a file or directory should be treated. @@@ -244,7 -243,7 +244,7 @@@ static int do_read_blob(const struct ob *size_out = 0; *data_out = NULL; - data = read_sha1_file(oid->hash, &type, &sz); + data = read_object_file(oid, &type, &sz); if (!data || type != OBJ_BLOB) { free(data); return -1; @@@ -2993,7 -2992,7 +2993,7 @@@ void untracked_cache_invalidate_path(st { if (!istate->untracked || !istate->untracked->root) return; - if (!safe_path && !verify_path(path)) + if (!safe_path && !verify_path(path, 0)) return; invalidate_one_component(istate->untracked, istate->untracked->root, path, strlen(path)); @@@ -3011,57 -3010,8 +3011,57 @@@ void untracked_cache_add_to_index(struc untracked_cache_invalidate_path(istate, path, 1); } -/* Update gitfile and core.worktree setting to connect work tree and git dir */ -void connect_work_tree_and_git_dir(const char *work_tree_, const char *git_dir_) +static void connect_wt_gitdir_in_nested(const char *sub_worktree, + const char *sub_gitdir) +{ + int i; + struct repository subrepo; + struct strbuf sub_wt = STRBUF_INIT; + struct strbuf sub_gd = STRBUF_INIT; + + const struct submodule *sub; + + /* If the submodule has no working tree, we can ignore it. */ + if (repo_init(&subrepo, sub_gitdir, sub_worktree)) + return; + + if (repo_read_index(&subrepo) < 0) + die("index file corrupt in repo %s", subrepo.gitdir); + + for (i = 0; i < subrepo.index->cache_nr; i++) { + const struct cache_entry *ce = subrepo.index->cache[i]; + + if (!S_ISGITLINK(ce->ce_mode)) + continue; + + while (i + 1 < subrepo.index->cache_nr && + !strcmp(ce->name, subrepo.index->cache[i + 1]->name)) + /* + * Skip entries with the same name in different stages + * to make sure an entry is returned only once. + */ + i++; + + sub = submodule_from_path(&subrepo, &null_oid, ce->name); + if (!sub || !is_submodule_active(&subrepo, ce->name)) + /* .gitmodules broken or inactive sub */ + continue; + + strbuf_reset(&sub_wt); + strbuf_reset(&sub_gd); + strbuf_addf(&sub_wt, "%s/%s", sub_worktree, sub->path); + strbuf_addf(&sub_gd, "%s/modules/%s", sub_gitdir, sub->name); + + connect_work_tree_and_git_dir(sub_wt.buf, sub_gd.buf, 1); + } + strbuf_release(&sub_wt); + strbuf_release(&sub_gd); + repo_clear(&subrepo); +} + +void connect_work_tree_and_git_dir(const char *work_tree_, + const char *git_dir_, + int recurse_into_nested) { struct strbuf gitfile_sb = STRBUF_INIT; struct strbuf cfg_sb = STRBUF_INIT; @@@ -3091,10 -3041,6 +3091,10 @@@ strbuf_release(&gitfile_sb); strbuf_release(&cfg_sb); strbuf_release(&rel_path); + + if (recurse_into_nested) + connect_wt_gitdir_in_nested(work_tree, git_dir); + free(work_tree); free(git_dir); } @@@ -3108,5 -3054,5 +3108,5 @@@ void relocate_gitdir(const char *path, die_errno(_("could not migrate git directory from '%s' to '%s'"), old_git_dir, new_git_dir); - connect_work_tree_and_git_dir(path, new_git_dir); + connect_work_tree_and_git_dir(path, new_git_dir, 0); } diff --combined fsck.c index 640422a6c6,9339f31513..4db2277ab8 --- a/fsck.c +++ b/fsck.c @@@ -10,6 -10,13 +10,13 @@@ #include "utf8.h" #include "sha1-array.h" #include "decorate.h" + #include "oidset.h" + #include "packfile.h" + #include "submodule-config.h" + #include "config.h" + + static struct oidset gitmodules_found = OIDSET_INIT; + static struct oidset gitmodules_done = OIDSET_INIT; #define FSCK_FATAL -1 #define FSCK_INFO -2 @@@ -44,6 -51,7 +51,7 @@@ FUNC(MISSING_TAG_ENTRY, ERROR) \ FUNC(MISSING_TAG_OBJECT, ERROR) \ FUNC(MISSING_TREE, ERROR) \ + FUNC(MISSING_TREE_OBJECT, ERROR) \ FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ @@@ -51,6 -59,11 +59,11 @@@ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@@ -396,11 -409,9 +409,11 @@@ static int fsck_walk_commit(struct comm name = get_object_name(options, &commit->object); if (name) - put_object_name(options, &commit->tree->object, "%s:", name); + put_object_name(options, &get_commit_tree(commit)->object, + "%s:", name); - result = options->walk((struct object *)commit->tree, OBJ_TREE, data, options); + result = options->walk((struct object *)get_commit_tree(commit), + OBJ_TREE, data, options); if (result < 0) return result; res = result; @@@ -563,10 -574,18 +576,18 @@@ static int fsck_tree(struct tree *item has_empty_name |= !*name; has_dot |= !strcmp(name, "."); has_dotdot |= !strcmp(name, ".."); - has_dotgit |= (!strcmp(name, ".git") || - is_hfs_dotgit(name) || - is_ntfs_dotgit(name)); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; + + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { + if (!S_ISLNK(mode)) + oidset_insert(&gitmodules_found, oid); + else + retval += report(options, &item->object, + FSCK_MSG_GITMODULES_SYMLINK, + ".gitmodules is a symbolic link"); + } + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; @@@ -774,7 -793,7 +795,7 @@@ static int fsck_commit_buffer(struct co err = fsck_ident(&buffer, &commit->object, options); if (err) return err; - if (!commit->tree) { + if (!get_commit_tree(commit)) { err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1)); if (err) return err; @@@ -813,7 -832,7 +834,7 @@@ static int fsck_tag_buffer(struct tag * enum object_type type; buffer = to_free = - read_sha1_file(tag->object.oid.hash, &type, &size); + read_object_file(&tag->object.oid, &type, &size); if (!buffer) return report(options, &tag->object, FSCK_MSG_MISSING_TAG_OBJECT, @@@ -903,6 -922,66 +924,66 @@@ static int fsck_tag(struct tag *tag, co return fsck_tag_buffer(tag, data, size, options); } + struct fsck_gitmodules_data { + struct object *obj; + struct fsck_options *options; + int ret; + }; + + static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) + { + struct fsck_gitmodules_data *data = vdata; + const char *subsection, *key; + int subsection_len; + char *name; + + if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || + !subsection) + return 0; + + name = xmemdupz(subsection, subsection_len); + if (check_submodule_name(name) < 0) + data->ret |= report(data->options, data->obj, + FSCK_MSG_GITMODULES_NAME, + "disallowed submodule name: %s", + name); + free(name); + + return 0; + } + + static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) + { + struct fsck_gitmodules_data data; + + if (!oidset_contains(&gitmodules_found, &blob->object.oid)) + return 0; + oidset_insert(&gitmodules_done, &blob->object.oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + ".gitmodules too large to parse"); + } + + data.obj = &blob->object; + data.options = options; + data.ret = 0; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data)) + data.ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + + return data.ret; + } + int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options) { @@@ -910,7 -989,7 +991,7 @@@ return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); if (obj->type == OBJ_BLOB) - return 0; + return fsck_blob((struct blob *)obj, data, size, options); if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, options); if (obj->type == OBJ_COMMIT) @@@ -934,3 -1013,52 +1015,52 @@@ int fsck_error_function(struct fsck_opt error("object %s: %s", describe_object(o, obj), message); return 1; } + + int fsck_finish(struct fsck_options *options) + { + int ret = 0; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&gitmodules_found, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct blob *blob; + enum object_type type; + unsigned long size; + char *buf; + + if (oidset_contains(&gitmodules_done, oid)) + continue; + + blob = lookup_blob(oid); + if (!blob) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + continue; + } + - buf = read_sha1_file(oid->hash, &type, &size); ++ buf = read_object_file(oid, &type, &size); + if (!buf) { + if (is_promisor_object(&blob->object.oid)) + continue; + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_MISSING, + "unable to read .gitmodules blob"); + continue; + } + + if (type == OBJ_BLOB) + ret |= fsck_blob(blob, buf, size, options); + else + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + free(buf); + } + + + oidset_clear(&gitmodules_found); + oidset_clear(&gitmodules_done); + return ret; + } diff --combined git-compat-util.h index f9e4c5f9bc,76cd42bd63..94a108c03e --- a/git-compat-util.h +++ b/git-compat-util.h @@@ -284,10 -284,6 +284,10 @@@ extern char *gitdirname(char *) #include #endif +#ifdef HAVE_SYSINFO +# include +#endif + /* On most systems would have given us this, but * not on some systems (e.g. z/OS). */ @@@ -459,7 -455,6 +459,7 @@@ extern void (*get_warn_routine(void))(c extern void set_die_is_recursing_routine(int (*routine)(void)); extern int starts_with(const char *str, const char *prefix); +extern int istarts_with(const char *str, const char *prefix); /* * If the string "str" begins with the string found in "prefix", return 1. @@@ -1006,6 -1001,23 +1006,23 @@@ static inline int sane_iscase(int x, in return (x & 0x20) == 0; } + /* + * Like skip_prefix, but compare case-insensitively. Note that the comparison + * is done via tolower(), so it is strictly ASCII (no multi-byte characters or + * locale-specific conversions). + */ + static inline int skip_iprefix(const char *str, const char *prefix, + const char **out) + { + do { + if (!*prefix) { + *out = str; + return 1; + } + } while (tolower(*str++) == tolower(*prefix++)); + return 0; + } + static inline int strtoul_ui(char const *s, int base, unsigned int *result) { unsigned long ul; diff --combined path.c index 3308b7b958,4c4a751539..7f109f6618 --- a/path.c +++ b/path.c @@@ -10,7 -10,6 +10,7 @@@ #include "submodule-config.h" #include "path.h" #include "packfile.h" +#include "object-store.h" static int get_st_mode_bits(const char *path, int *mode) { @@@ -383,7 -382,7 +383,7 @@@ static void adjust_git_path(const struc strbuf_splice(buf, 0, buf->len, repo->index_file, strlen(repo->index_file)); else if (dir_prefix(base, "objects")) - replace_dir(buf, git_dir_len + 7, repo->objectdir); + replace_dir(buf, git_dir_len + 7, repo->objects->objectdir); else if (git_hooks_path && dir_prefix(base, "hooks")) replace_dir(buf, git_dir_len + 5, git_hooks_path); else if (repo->different_commondir) @@@ -1306,7 -1305,7 +1306,7 @@@ static int only_spaces_and_periods(cons int is_ntfs_dotgit(const char *name) { - int len; + size_t len; for (len = 0; ; len++) if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { @@@ -1323,6 -1322,90 +1323,90 @@@ } } + static int is_ntfs_dot_generic(const char *name, + const char *dotgit_name, + size_t len, + const char *dotgit_ntfs_shortname_prefix) + { + int saw_tilde; + size_t i; + + if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) { + i = len + 1; + only_spaces_and_periods: + for (;;) { + char c = name[i++]; + if (!c) + return 1; + if (c != ' ' && c != '.') + return 0; + } + } + + /* + * Is it a regular NTFS short name, i.e. shortened to 6 characters, + * followed by ~1, ... ~4? + */ + if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' && + name[7] >= '1' && name[7] <= '4') { + i = 8; + goto only_spaces_and_periods; + } + + /* + * Is it a fall-back NTFS short name (for details, see + * https://en.wikipedia.org/wiki/8.3_filename? + */ + for (i = 0, saw_tilde = 0; i < 8; i++) + if (name[i] == '\0') + return 0; + else if (saw_tilde) { + if (name[i] < '0' || name[i] > '9') + return 0; + } else if (name[i] == '~') { + if (name[++i] < '1' || name[i] > '9') + return 0; + saw_tilde = 1; + } else if (i >= 6) + return 0; + else if (name[i] < 0) { + /* + * We know our needles contain only ASCII, so we clamp + * here to make the results of tolower() sane. + */ + return 0; + } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i]) + return 0; + + goto only_spaces_and_periods; + } + + /* + * Inline helper to make sure compiler resolves strlen() on literals at + * compile time. + */ + static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name, + const char *dotgit_ntfs_shortname_prefix) + { + return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name), + dotgit_ntfs_shortname_prefix); + } + + int is_ntfs_dotgitmodules(const char *name) + { + return is_ntfs_dot_str(name, "gitmodules", "gi7eba"); + } + + int is_ntfs_dotgitignore(const char *name) + { + return is_ntfs_dot_str(name, "gitignore", "gi250a"); + } + + int is_ntfs_dotgitattributes(const char *name) + { + return is_ntfs_dot_str(name, "gitattributes", "gi7d29"); + } + int looks_like_command_line_option(const char *str) { return str && str[0] == '-'; diff --combined read-cache.c index fa3df2e72e,4b35e87847..cebd24849d --- a/read-cache.c +++ b/read-cache.c @@@ -185,7 -185,7 +185,7 @@@ static int ce_compare_link(const struc if (strbuf_readlink(&sb, ce->name, expected_size)) return -1; - buffer = read_sha1_file(ce->oid.hash, &type, &size); + buffer = read_object_file(&ce->oid, &type, &size); if (buffer) { if (size == sb.len) match = memcmp(buffer, sb.buf, size); @@@ -752,7 -752,7 +752,7 @@@ struct cache_entry *make_cache_entry(un int size, len; struct cache_entry *ce, *ret; - if (!verify_path(path)) { + if (!verify_path(path, mode)) { error("Invalid path '%s'", path); return NULL; } @@@ -817,7 -817,7 +817,7 @@@ int ce_same_name(const struct cache_ent * Also, we don't want double slashes or slashes at the * end that can make pathnames ambiguous. */ - static int verify_dotfile(const char *rest) + static int verify_dotfile(const char *rest, unsigned mode) { /* * The first character was '.', but that @@@ -831,8 -831,13 +831,13 @@@ switch (*rest) { /* - * ".git" followed by NUL or slash is bad. This - * shares the path end test with the ".." case. + * ".git" followed by NUL or slash is bad. Note that we match + * case-insensitively here, even if ignore_case is not set. + * This outlaws ".GIT" everywhere out of an abundance of caution, + * since there's really no good reason to allow it. + * + * Once we've seen ".git", we can also find ".gitmodules", etc (also + * case-insensitively). */ case 'g': case 'G': @@@ -840,8 -845,15 +845,15 @@@ break; if (rest[2] != 't' && rest[2] != 'T') break; - rest += 2; - /* fallthrough */ + if (rest[3] == '\0' || is_dir_sep(rest[3])) + return 0; + if (S_ISLNK(mode)) { + rest += 3; + if (skip_iprefix(rest, "modules", &rest) && + (*rest == '\0' || is_dir_sep(*rest))) + return 0; + } + break; case '.': if (rest[1] == '\0' || is_dir_sep(rest[1])) return 0; @@@ -849,7 -861,7 +861,7 @@@ return 1; } - int verify_path(const char *path) + int verify_path(const char *path, unsigned mode) { char c; @@@ -862,12 -874,25 +874,25 @@@ return 1; if (is_dir_sep(c)) { inside: - if (protect_hfs && is_hfs_dotgit(path)) - return 0; - if (protect_ntfs && is_ntfs_dotgit(path)) - return 0; + if (protect_hfs) { + if (is_hfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_hfs_dotgitmodules(path)) + return 0; + } + } + if (protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } + } + c = *path++; - if ((c == '.' && !verify_dotfile(path)) || + if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; } @@@ -1184,7 -1209,7 +1209,7 @@@ static int add_index_entry_with_check(s if (!ok_to_add) return -1; - if (!verify_path(ce->name)) + if (!verify_path(ce->name, ce->ce_mode)) return error("Invalid path '%s'", ce->name); if (!skip_df_check && @@@ -2268,7 -2293,7 +2293,7 @@@ static int do_write_index(struct index_ if (!istate->version) { istate->version = get_index_format_default(); - if (getenv("GIT_TEST_SPLIT_INDEX")) + if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0)) init_split_index(istate); } @@@ -2559,7 -2584,7 +2584,7 @@@ int write_locked_index(struct index_sta goto out; } - if (getenv("GIT_TEST_SPLIT_INDEX")) { + if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0)) { int v = si->base_sha1[0]; if ((v & 15) < 6) istate->cache_changed |= SPLIT_INDEX_ORDERED; @@@ -2693,7 -2718,7 +2718,7 @@@ void *read_blob_data_from_index(const s } if (pos < 0) return NULL; - data = read_sha1_file(istate->cache[pos]->oid.hash, &type, &sz); + data = read_object_file(&istate->cache[pos]->oid, &type, &sz); if (!data || type != OBJ_BLOB) { free(data); return NULL; diff --combined sha1-file.c index f66059ed7d,0000000000..e47098eff2 mode 100644,000000..100644 --- a/sha1-file.c +++ b/sha1-file.c @@@ -1,2260 -1,0 +1,2260 @@@ +/* + * GIT - The information manager from hell + * + * Copyright (C) Linus Torvalds, 2005 + * + * This handles basic git sha1 object files - packing, unpacking, + * creation etc. + */ +#include "cache.h" +#include "config.h" +#include "string-list.h" +#include "lockfile.h" +#include "delta.h" +#include "pack.h" +#include "blob.h" +#include "commit.h" +#include "run-command.h" +#include "tag.h" +#include "tree.h" +#include "tree-walk.h" +#include "refs.h" +#include "pack-revindex.h" +#include "sha1-lookup.h" +#include "bulk-checkin.h" +#include "repository.h" +#include "replace-object.h" +#include "streaming.h" +#include "dir.h" +#include "list.h" +#include "mergesort.h" +#include "quote.h" +#include "packfile.h" +#include "fetch-object.h" +#include "object-store.h" + +/* The maximum size for an object header. */ +#define MAX_HEADER_LEN 32 + +const unsigned char null_sha1[GIT_MAX_RAWSZ]; +const struct object_id null_oid; +const struct object_id empty_tree_oid = { + EMPTY_TREE_SHA1_BIN_LITERAL +}; +const struct object_id empty_blob_oid = { + EMPTY_BLOB_SHA1_BIN_LITERAL +}; + +static void git_hash_sha1_init(git_hash_ctx *ctx) +{ + git_SHA1_Init(&ctx->sha1); +} + +static void git_hash_sha1_update(git_hash_ctx *ctx, const void *data, size_t len) +{ + git_SHA1_Update(&ctx->sha1, data, len); +} + +static void git_hash_sha1_final(unsigned char *hash, git_hash_ctx *ctx) +{ + git_SHA1_Final(hash, &ctx->sha1); +} + +static void git_hash_unknown_init(git_hash_ctx *ctx) +{ + die("trying to init unknown hash"); +} + +static void git_hash_unknown_update(git_hash_ctx *ctx, const void *data, size_t len) +{ + die("trying to update unknown hash"); +} + +static void git_hash_unknown_final(unsigned char *hash, git_hash_ctx *ctx) +{ + die("trying to finalize unknown hash"); +} + +const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { + { + NULL, + 0x00000000, + 0, + 0, + git_hash_unknown_init, + git_hash_unknown_update, + git_hash_unknown_final, + NULL, + NULL, + }, + { + "sha-1", + /* "sha1", big-endian */ + 0x73686131, + GIT_SHA1_RAWSZ, + GIT_SHA1_HEXSZ, + git_hash_sha1_init, + git_hash_sha1_update, + git_hash_sha1_final, + &empty_tree_oid, + &empty_blob_oid, + }, +}; + +/* + * This is meant to hold a *small* number of objects that you would + * want read_sha1_file() to be able to return, but yet you do not want + * to write them into the object store (e.g. a browse-only + * application). + */ +static struct cached_object { + unsigned char sha1[20]; + enum object_type type; + void *buf; + unsigned long size; +} *cached_objects; +static int cached_object_nr, cached_object_alloc; + +static struct cached_object empty_tree = { + EMPTY_TREE_SHA1_BIN_LITERAL, + OBJ_TREE, + "", + 0 +}; + +static struct cached_object *find_cached_object(const unsigned char *sha1) +{ + int i; + struct cached_object *co = cached_objects; + + for (i = 0; i < cached_object_nr; i++, co++) { + if (!hashcmp(co->sha1, sha1)) + return co; + } + if (!hashcmp(sha1, empty_tree.sha1)) + return &empty_tree; + return NULL; +} + + +static int get_conv_flags(unsigned flags) +{ + if (flags & HASH_RENORMALIZE) + return CONV_EOL_RENORMALIZE; + else if (flags & HASH_WRITE_OBJECT) + return global_conv_flags_eol | CONV_WRITE_OBJECT; + else + return 0; +} + + +int mkdir_in_gitdir(const char *path) +{ + if (mkdir(path, 0777)) { + int saved_errno = errno; + struct stat st; + struct strbuf sb = STRBUF_INIT; + + if (errno != EEXIST) + return -1; + /* + * Are we looking at a path in a symlinked worktree + * whose original repository does not yet have it? + * e.g. .git/rr-cache pointing at its original + * repository in which the user hasn't performed any + * conflict resolution yet? + */ + if (lstat(path, &st) || !S_ISLNK(st.st_mode) || + strbuf_readlink(&sb, path, st.st_size) || + !is_absolute_path(sb.buf) || + mkdir(sb.buf, 0777)) { + strbuf_release(&sb); + errno = saved_errno; + return -1; + } + strbuf_release(&sb); + } + return adjust_shared_perm(path); +} + +enum scld_error safe_create_leading_directories(char *path) +{ + char *next_component = path + offset_1st_component(path); + enum scld_error ret = SCLD_OK; + + while (ret == SCLD_OK && next_component) { + struct stat st; + char *slash = next_component, slash_character; + + while (*slash && !is_dir_sep(*slash)) + slash++; + + if (!*slash) + break; + + next_component = slash + 1; + while (is_dir_sep(*next_component)) + next_component++; + if (!*next_component) + break; + + slash_character = *slash; + *slash = '\0'; + if (!stat(path, &st)) { + /* path exists */ + if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + ret = SCLD_EXISTS; + } + } else if (mkdir(path, 0777)) { + if (errno == EEXIST && + !stat(path, &st) && S_ISDIR(st.st_mode)) + ; /* somebody created it since we checked */ + else if (errno == ENOENT) + /* + * Either mkdir() failed because + * somebody just pruned the containing + * directory, or stat() failed because + * the file that was in our way was + * just removed. Either way, inform + * the caller that it might be worth + * trying again: + */ + ret = SCLD_VANISHED; + else + ret = SCLD_FAILED; + } else if (adjust_shared_perm(path)) { + ret = SCLD_PERMS; + } + *slash = slash_character; + } + return ret; +} + +enum scld_error safe_create_leading_directories_const(const char *path) +{ + int save_errno; + /* path points to cache entries, so xstrdup before messing with it */ + char *buf = xstrdup(path); + enum scld_error result = safe_create_leading_directories(buf); + + save_errno = errno; + free(buf); + errno = save_errno; + return result; +} + +int raceproof_create_file(const char *path, create_file_fn fn, void *cb) +{ + /* + * The number of times we will try to remove empty directories + * in the way of path. This is only 1 because if another + * process is racily creating directories that conflict with + * us, we don't want to fight against them. + */ + int remove_directories_remaining = 1; + + /* + * The number of times that we will try to create the + * directories containing path. We are willing to attempt this + * more than once, because another process could be trying to + * clean up empty directories at the same time as we are + * trying to create them. + */ + int create_directories_remaining = 3; + + /* A scratch copy of path, filled lazily if we need it: */ + struct strbuf path_copy = STRBUF_INIT; + + int ret, save_errno; + + /* Sanity check: */ + assert(*path); + +retry_fn: + ret = fn(path, cb); + save_errno = errno; + if (!ret) + goto out; + + if (errno == EISDIR && remove_directories_remaining-- > 0) { + /* + * A directory is in the way. Maybe it is empty; try + * to remove it: + */ + if (!path_copy.len) + strbuf_addstr(&path_copy, path); + + if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) + goto retry_fn; + } else if (errno == ENOENT && create_directories_remaining-- > 0) { + /* + * Maybe the containing directory didn't exist, or + * maybe it was just deleted by a process that is + * racing with us to clean up empty directories. Try + * to create it: + */ + enum scld_error scld_result; + + if (!path_copy.len) + strbuf_addstr(&path_copy, path); + + do { + scld_result = safe_create_leading_directories(path_copy.buf); + if (scld_result == SCLD_OK) + goto retry_fn; + } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0); + } + +out: + strbuf_release(&path_copy); + errno = save_errno; + return ret; +} + +static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1) +{ + int i; + for (i = 0; i < 20; i++) { + static char hex[] = "0123456789abcdef"; + unsigned int val = sha1[i]; + strbuf_addch(buf, hex[val >> 4]); + strbuf_addch(buf, hex[val & 0xf]); + if (!i) + strbuf_addch(buf, '/'); + } +} + +void sha1_file_name(struct repository *r, struct strbuf *buf, const unsigned char *sha1) +{ + strbuf_addstr(buf, r->objects->objectdir); + strbuf_addch(buf, '/'); + fill_sha1_path(buf, sha1); +} + +struct strbuf *alt_scratch_buf(struct alternate_object_database *alt) +{ + strbuf_setlen(&alt->scratch, alt->base_len); + return &alt->scratch; +} + +static const char *alt_sha1_path(struct alternate_object_database *alt, + const unsigned char *sha1) +{ + struct strbuf *buf = alt_scratch_buf(alt); + fill_sha1_path(buf, sha1); + return buf->buf; +} + +/* + * Return non-zero iff the path is usable as an alternate object database. + */ +static int alt_odb_usable(struct raw_object_store *o, + struct strbuf *path, + const char *normalized_objdir) +{ + struct alternate_object_database *alt; + + /* Detect cases where alternate disappeared */ + if (!is_directory(path->buf)) { + error("object directory %s does not exist; " + "check .git/objects/info/alternates.", + path->buf); + return 0; + } + + /* + * Prevent the common mistake of listing the same + * thing twice, or object directory itself. + */ + for (alt = o->alt_odb_list; alt; alt = alt->next) { + if (!fspathcmp(path->buf, alt->path)) + return 0; + } + if (!fspathcmp(path->buf, normalized_objdir)) + return 0; + + return 1; +} + +/* + * Prepare alternate object database registry. + * + * The variable alt_odb_list points at the list of struct + * alternate_object_database. The elements on this list come from + * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT + * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, + * whose contents is similar to that environment variable but can be + * LF separated. Its base points at a statically allocated buffer that + * contains "/the/directory/corresponding/to/.git/objects/...", while + * its name points just after the slash at the end of ".git/objects/" + * in the example above, and has enough space to hold 40-byte hex + * SHA1, an extra slash for the first level indirection, and the + * terminating NUL. + */ +static void read_info_alternates(struct repository *r, + const char *relative_base, + int depth); +static int link_alt_odb_entry(struct repository *r, const char *entry, + const char *relative_base, int depth, const char *normalized_objdir) +{ + struct alternate_object_database *ent; + struct strbuf pathbuf = STRBUF_INIT; + + if (!is_absolute_path(entry) && relative_base) { + strbuf_realpath(&pathbuf, relative_base, 1); + strbuf_addch(&pathbuf, '/'); + } + strbuf_addstr(&pathbuf, entry); + + if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) { + error("unable to normalize alternate object path: %s", + pathbuf.buf); + strbuf_release(&pathbuf); + return -1; + } + + /* + * The trailing slash after the directory name is given by + * this function at the end. Remove duplicates. + */ + while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') + strbuf_setlen(&pathbuf, pathbuf.len - 1); + + if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir)) { + strbuf_release(&pathbuf); + return -1; + } + + ent = alloc_alt_odb(pathbuf.buf); + + /* add the alternate entry */ + *r->objects->alt_odb_tail = ent; + r->objects->alt_odb_tail = &(ent->next); + ent->next = NULL; + + /* recursively add alternates */ + read_info_alternates(r, pathbuf.buf, depth + 1); + + strbuf_release(&pathbuf); + return 0; +} + +static const char *parse_alt_odb_entry(const char *string, + int sep, + struct strbuf *out) +{ + const char *end; + + strbuf_reset(out); + + if (*string == '#') { + /* comment; consume up to next separator */ + end = strchrnul(string, sep); + } else if (*string == '"' && !unquote_c_style(out, string, &end)) { + /* + * quoted path; unquote_c_style has copied the + * data for us and set "end". Broken quoting (e.g., + * an entry that doesn't end with a quote) falls + * back to the unquoted case below. + */ + } else { + /* normal, unquoted path */ + end = strchrnul(string, sep); + strbuf_add(out, string, end - string); + } + + if (*end) + end++; + return end; +} + +static void link_alt_odb_entries(struct repository *r, const char *alt, + int sep, const char *relative_base, int depth) +{ + struct strbuf objdirbuf = STRBUF_INIT; + struct strbuf entry = STRBUF_INIT; + + if (!alt || !*alt) + return; + + if (depth > 5) { + error("%s: ignoring alternate object stores, nesting too deep.", + relative_base); + return; + } + + strbuf_add_absolute_path(&objdirbuf, r->objects->objectdir); + if (strbuf_normalize_path(&objdirbuf) < 0) + die("unable to normalize object directory: %s", + objdirbuf.buf); + + while (*alt) { + alt = parse_alt_odb_entry(alt, sep, &entry); + if (!entry.len) + continue; + link_alt_odb_entry(r, entry.buf, + relative_base, depth, objdirbuf.buf); + } + strbuf_release(&entry); + strbuf_release(&objdirbuf); +} + +static void read_info_alternates(struct repository *r, + const char *relative_base, + int depth) +{ + char *path; + struct strbuf buf = STRBUF_INIT; + + path = xstrfmt("%s/info/alternates", relative_base); + if (strbuf_read_file(&buf, path, 1024) < 0) { + warn_on_fopen_errors(path); + free(path); + return; + } + + link_alt_odb_entries(r, buf.buf, '\n', relative_base, depth); + strbuf_release(&buf); + free(path); +} + +struct alternate_object_database *alloc_alt_odb(const char *dir) +{ + struct alternate_object_database *ent; + + FLEX_ALLOC_STR(ent, path, dir); + strbuf_init(&ent->scratch, 0); + strbuf_addf(&ent->scratch, "%s/", dir); + ent->base_len = ent->scratch.len; + + return ent; +} + +void add_to_alternates_file(const char *reference) +{ + struct lock_file lock = LOCK_INIT; + char *alts = git_pathdup("objects/info/alternates"); + FILE *in, *out; + int found = 0; + + hold_lock_file_for_update(&lock, alts, LOCK_DIE_ON_ERROR); + out = fdopen_lock_file(&lock, "w"); + if (!out) + die_errno("unable to fdopen alternates lockfile"); + + in = fopen(alts, "r"); + if (in) { + struct strbuf line = STRBUF_INIT; + + while (strbuf_getline(&line, in) != EOF) { + if (!strcmp(reference, line.buf)) { + found = 1; + break; + } + fprintf_or_die(out, "%s\n", line.buf); + } + + strbuf_release(&line); + fclose(in); + } + else if (errno != ENOENT) + die_errno("unable to read alternates file"); + + if (found) { + rollback_lock_file(&lock); + } else { + fprintf_or_die(out, "%s\n", reference); + if (commit_lock_file(&lock)) + die_errno("unable to move new alternates file into place"); + if (the_repository->objects->alt_odb_tail) + link_alt_odb_entries(the_repository, reference, + '\n', NULL, 0); + } + free(alts); +} + +void add_to_alternates_memory(const char *reference) +{ + /* + * Make sure alternates are initialized, or else our entry may be + * overwritten when they are. + */ + prepare_alt_odb(the_repository); + + link_alt_odb_entries(the_repository, reference, + '\n', NULL, 0); +} + +/* + * Compute the exact path an alternate is at and returns it. In case of + * error NULL is returned and the human readable error is added to `err` + * `path` may be relative and should point to $GITDIR. + * `err` must not be null. + */ +char *compute_alternate_path(const char *path, struct strbuf *err) +{ + char *ref_git = NULL; + const char *repo, *ref_git_s; + int seen_error = 0; + + ref_git_s = real_path_if_valid(path); + if (!ref_git_s) { + seen_error = 1; + strbuf_addf(err, _("path '%s' does not exist"), path); + goto out; + } else + /* + * Beware: read_gitfile(), real_path() and mkpath() + * return static buffer + */ + ref_git = xstrdup(ref_git_s); + + repo = read_gitfile(ref_git); + if (!repo) + repo = read_gitfile(mkpath("%s/.git", ref_git)); + if (repo) { + free(ref_git); + ref_git = xstrdup(repo); + } + + if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) { + char *ref_git_git = mkpathdup("%s/.git", ref_git); + free(ref_git); + ref_git = ref_git_git; + } else if (!is_directory(mkpath("%s/objects", ref_git))) { + struct strbuf sb = STRBUF_INIT; + seen_error = 1; + if (get_common_dir(&sb, ref_git)) { + strbuf_addf(err, + _("reference repository '%s' as a linked " + "checkout is not supported yet."), + path); + goto out; + } + + strbuf_addf(err, _("reference repository '%s' is not a " + "local repository."), path); + goto out; + } + + if (!access(mkpath("%s/shallow", ref_git), F_OK)) { + strbuf_addf(err, _("reference repository '%s' is shallow"), + path); + seen_error = 1; + goto out; + } + + if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) { + strbuf_addf(err, + _("reference repository '%s' is grafted"), + path); + seen_error = 1; + goto out; + } + +out: + if (seen_error) { + FREE_AND_NULL(ref_git); + } + + return ref_git; +} + +int foreach_alt_odb(alt_odb_fn fn, void *cb) +{ + struct alternate_object_database *ent; + int r = 0; + + prepare_alt_odb(the_repository); + for (ent = the_repository->objects->alt_odb_list; ent; ent = ent->next) { + r = fn(ent, cb); + if (r) + break; + } + return r; +} + +void prepare_alt_odb(struct repository *r) +{ + if (r->objects->alt_odb_tail) + return; + + r->objects->alt_odb_tail = &r->objects->alt_odb_list; + link_alt_odb_entries(r, r->objects->alternate_db, PATH_SEP, NULL, 0); + + read_info_alternates(r, r->objects->objectdir, 0); +} + +/* Returns 1 if we have successfully freshened the file, 0 otherwise. */ +static int freshen_file(const char *fn) +{ + struct utimbuf t; + t.actime = t.modtime = time(NULL); + return !utime(fn, &t); +} + +/* + * All of the check_and_freshen functions return 1 if the file exists and was + * freshened (if freshening was requested), 0 otherwise. If they return + * 0, you should not assume that it is safe to skip a write of the object (it + * either does not exist on disk, or has a stale mtime and may be subject to + * pruning). + */ +int check_and_freshen_file(const char *fn, int freshen) +{ + if (access(fn, F_OK)) + return 0; + if (freshen && !freshen_file(fn)) + return 0; + return 1; +} + +static int check_and_freshen_local(const unsigned char *sha1, int freshen) +{ + static struct strbuf buf = STRBUF_INIT; + + strbuf_reset(&buf); + sha1_file_name(the_repository, &buf, sha1); + + return check_and_freshen_file(buf.buf, freshen); +} + +static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen) +{ + struct alternate_object_database *alt; + prepare_alt_odb(the_repository); + for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) { + const char *path = alt_sha1_path(alt, sha1); + if (check_and_freshen_file(path, freshen)) + return 1; + } + return 0; +} + +static int check_and_freshen(const unsigned char *sha1, int freshen) +{ + return check_and_freshen_local(sha1, freshen) || + check_and_freshen_nonlocal(sha1, freshen); +} + +int has_loose_object_nonlocal(const unsigned char *sha1) +{ + return check_and_freshen_nonlocal(sha1, 0); +} + +static int has_loose_object(const unsigned char *sha1) +{ + return check_and_freshen(sha1, 0); +} + +static void mmap_limit_check(size_t length) +{ + static size_t limit = 0; + if (!limit) { + limit = git_env_ulong("GIT_MMAP_LIMIT", 0); + if (!limit) + limit = SIZE_MAX; + } + if (length > limit) + die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX, + (uintmax_t)length, (uintmax_t)limit); +} + +void *xmmap_gently(void *start, size_t length, + int prot, int flags, int fd, off_t offset) +{ + void *ret; + + mmap_limit_check(length); + ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) { + if (!length) + return NULL; + release_pack_memory(length); + ret = mmap(start, length, prot, flags, fd, offset); + } + return ret; +} + +void *xmmap(void *start, size_t length, + int prot, int flags, int fd, off_t offset) +{ + void *ret = xmmap_gently(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) + die_errno("mmap failed"); + return ret; +} + +/* + * With an in-core object data in "map", rehash it to make sure the + * object name actually matches "sha1" to detect object corruption. + * With "map" == NULL, try reading the object named with "sha1" using + * the streaming interface and rehash it to do the same. + */ +int check_object_signature(const struct object_id *oid, void *map, + unsigned long size, const char *type) +{ + struct object_id real_oid; + enum object_type obj_type; + struct git_istream *st; + git_hash_ctx c; + char hdr[MAX_HEADER_LEN]; + int hdrlen; + + if (map) { + hash_object_file(map, size, type, &real_oid); + return oidcmp(oid, &real_oid) ? -1 : 0; + } + + st = open_istream(oid, &obj_type, &size, NULL); + if (!st) + return -1; + + /* Generate the header */ + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", type_name(obj_type), size) + 1; + + /* Sha1.. */ + the_hash_algo->init_fn(&c); + the_hash_algo->update_fn(&c, hdr, hdrlen); + for (;;) { + char buf[1024 * 16]; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); + + if (readlen < 0) { + close_istream(st); + return -1; + } + if (!readlen) + break; + the_hash_algo->update_fn(&c, buf, readlen); + } + the_hash_algo->final_fn(real_oid.hash, &c); + close_istream(st); + return oidcmp(oid, &real_oid) ? -1 : 0; +} + +int git_open_cloexec(const char *name, int flags) +{ + int fd; + static int o_cloexec = O_CLOEXEC; + + fd = open(name, flags | o_cloexec); + if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) { + /* Try again w/o O_CLOEXEC: the kernel might not support it */ + o_cloexec &= ~O_CLOEXEC; + fd = open(name, flags | o_cloexec); + } + +#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + { + static int fd_cloexec = FD_CLOEXEC; + + if (!o_cloexec && 0 <= fd && fd_cloexec) { + /* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */ + int flags = fcntl(fd, F_GETFD); + if (fcntl(fd, F_SETFD, flags | fd_cloexec)) + fd_cloexec = 0; + } + } +#endif + return fd; +} + +/* + * Find "sha1" as a loose object in the local repository or in an alternate. + * Returns 0 on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to sha1_file_name(), etc. + */ +static int stat_sha1_file(struct repository *r, const unsigned char *sha1, + struct stat *st, const char **path) +{ + struct alternate_object_database *alt; + static struct strbuf buf = STRBUF_INIT; + + strbuf_reset(&buf); + sha1_file_name(r, &buf, sha1); + *path = buf.buf; + + if (!lstat(*path, st)) + return 0; + + prepare_alt_odb(r); + errno = ENOENT; + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + *path = alt_sha1_path(alt, sha1); + if (!lstat(*path, st)) + return 0; + } + + return -1; +} + +/* + * Like stat_sha1_file(), but actually open the object and return the + * descriptor. See the caveats on the "path" parameter above. + */ +static int open_sha1_file(struct repository *r, + const unsigned char *sha1, const char **path) +{ + int fd; + struct alternate_object_database *alt; + int most_interesting_errno; + static struct strbuf buf = STRBUF_INIT; + + strbuf_reset(&buf); + sha1_file_name(r, &buf, sha1); + *path = buf.buf; + + fd = git_open(*path); + if (fd >= 0) + return fd; + most_interesting_errno = errno; + + prepare_alt_odb(r); + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + *path = alt_sha1_path(alt, sha1); + fd = git_open(*path); + if (fd >= 0) + return fd; + if (most_interesting_errno == ENOENT) + most_interesting_errno = errno; + } + errno = most_interesting_errno; + return -1; +} + +/* + * Map the loose object at "path" if it is not NULL, or the path found by + * searching for a loose object named "sha1". + */ +static void *map_sha1_file_1(struct repository *r, const char *path, + const unsigned char *sha1, unsigned long *size) +{ + void *map; + int fd; + + if (path) + fd = git_open(path); + else + fd = open_sha1_file(r, sha1, &path); + map = NULL; + if (fd >= 0) { + struct stat st; + + if (!fstat(fd, &st)) { + *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error("object file %s is empty", path); + return NULL; + } + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); + } + close(fd); + } + return map; +} + +void *map_sha1_file(struct repository *r, + const unsigned char *sha1, unsigned long *size) +{ + return map_sha1_file_1(r, NULL, sha1, size); +} + +static int unpack_sha1_short_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) +{ + /* Get the data stream */ + memset(stream, 0, sizeof(*stream)); + stream->next_in = map; + stream->avail_in = mapsize; + stream->next_out = buffer; + stream->avail_out = bufsiz; + + git_inflate_init(stream); + return git_inflate(stream, 0); +} + +int unpack_sha1_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) +{ + int status = unpack_sha1_short_header(stream, map, mapsize, + buffer, bufsiz); + + if (status < Z_OK) + return status; + + /* Make sure we have the terminating NUL */ + if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return -1; + return 0; +} + +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, + unsigned long mapsize, void *buffer, + unsigned long bufsiz, struct strbuf *header) +{ + int status; + + status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz); + if (status < Z_OK) + return -1; + + /* + * Check if entire header is unpacked in the first iteration. + */ + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + + /* + * buffer[0..bufsiz] was not large enough. Copy the partial + * result out to header, and then append the result of further + * reading the stream. + */ + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + stream->next_out = buffer; + stream->avail_out = bufsiz; + + do { + status = git_inflate(stream, 0); + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + stream->next_out = buffer; + stream->avail_out = bufsiz; + } while (status != Z_STREAM_END); + return -1; +} + +static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) +{ + int bytes = strlen(buffer) + 1; + unsigned char *buf = xmallocz(size); + unsigned long n; + int status = Z_OK; + + n = stream->total_out - bytes; + if (n > size) + n = size; + memcpy(buf, (char *) buffer + bytes, n); + bytes = n; + if (bytes <= size) { + /* + * The above condition must be (bytes <= size), not + * (bytes < size). In other words, even though we + * expect no more output and set avail_out to zero, + * the input zlib stream may have bytes that express + * "this concludes the stream", and we *do* want to + * eat that input. + * + * Otherwise we would not be able to test that we + * consumed all the input to reach the expected size; + * we also want to check that zlib tells us that all + * went well with status == Z_STREAM_END at the end. + */ + stream->next_out = buf + bytes; + stream->avail_out = size - bytes; + while (status == Z_OK) + status = git_inflate(stream, Z_FINISH); + } + if (status == Z_STREAM_END && !stream->avail_in) { + git_inflate_end(stream); + return buf; + } + + if (status < 0) + error("corrupt loose object '%s'", sha1_to_hex(sha1)); + else if (stream->avail_in) + error("garbage at end of loose object '%s'", + sha1_to_hex(sha1)); + free(buf); + return NULL; +} + +/* + * We used to just use "sscanf()", but that's actually way + * too permissive for what we want to check. So do an anal + * object header parse by hand. + */ +static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, + unsigned int flags) +{ + const char *type_buf = hdr; + unsigned long size; + int type, type_len = 0; + + /* + * The type can be of any size but is followed by + * a space. + */ + for (;;) { + char c = *hdr++; + if (!c) + return -1; + if (c == ' ') + break; + type_len++; + } + + type = type_from_string_gently(type_buf, type_len, 1); + if (oi->type_name) + strbuf_add(oi->type_name, type_buf, type_len); + /* + * Set type to 0 if its an unknown object and + * we're obtaining the type using '--allow-unknown-type' + * option. + */ + if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0)) + type = 0; + else if (type < 0) + die("invalid object type"); + if (oi->typep) + *oi->typep = type; + + /* + * The length must follow immediately, and be in canonical + * decimal format (ie "010" is not valid). + */ + size = *hdr++ - '0'; + if (size > 9) + return -1; + if (size) { + for (;;) { + unsigned long c = *hdr - '0'; + if (c > 9) + break; + hdr++; + size = size * 10 + c; + } + } + + if (oi->sizep) + *oi->sizep = size; + + /* + * The length must be followed by a zero byte + */ + return *hdr ? -1 : type; +} + +int parse_sha1_header(const char *hdr, unsigned long *sizep) +{ + struct object_info oi = OBJECT_INFO_INIT; + + oi.sizep = sizep; + return parse_sha1_header_extended(hdr, &oi, 0); +} + +static int sha1_loose_object_info(struct repository *r, + const unsigned char *sha1, + struct object_info *oi, int flags) +{ + int status = 0; + unsigned long mapsize; + void *map; + git_zstream stream; + char hdr[MAX_HEADER_LEN]; + struct strbuf hdrbuf = STRBUF_INIT; + unsigned long size_scratch; + + if (oi->delta_base_sha1) + hashclr(oi->delta_base_sha1); + + /* + * If we don't care about type or size, then we don't + * need to look inside the object at all. Note that we + * do not optimize out the stat call, even if the + * caller doesn't care about the disk-size, since our + * return value implicitly indicates whether the + * object even exists. + */ + if (!oi->typep && !oi->type_name && !oi->sizep && !oi->contentp) { + const char *path; + struct stat st; + if (stat_sha1_file(r, sha1, &st, &path) < 0) + return -1; + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + return 0; + } + + map = map_sha1_file(r, sha1, &mapsize); + if (!map) + return -1; + + if (!oi->sizep) + oi->sizep = &size_scratch; + + if (oi->disk_sizep) + *oi->disk_sizep = mapsize; + if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) { + if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) + status = error("unable to unpack %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) + status = error("unable to unpack %s header", + sha1_to_hex(sha1)); + if (status < 0) + ; /* Do nothing */ + else if (hdrbuf.len) { + if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0) + status = error("unable to parse %s header with --allow-unknown-type", + sha1_to_hex(sha1)); + } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0) + status = error("unable to parse %s header", sha1_to_hex(sha1)); + + if (status >= 0 && oi->contentp) { + *oi->contentp = unpack_sha1_rest(&stream, hdr, + *oi->sizep, sha1); + if (!*oi->contentp) { + git_inflate_end(&stream); + status = -1; + } + } else + git_inflate_end(&stream); + + munmap(map, mapsize); + if (status && oi->typep) + *oi->typep = status; + if (oi->sizep == &size_scratch) + oi->sizep = NULL; + strbuf_release(&hdrbuf); + oi->whence = OI_LOOSE; + return (status < 0) ? status : 0; +} + +int fetch_if_missing = 1; + +int oid_object_info_extended(struct repository *r, const struct object_id *oid, + struct object_info *oi, unsigned flags) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + const struct object_id *real = oid; + int already_retried = 0; + + if (flags & OBJECT_INFO_LOOKUP_REPLACE) + real = lookup_replace_object(r, oid); + + if (is_null_oid(real)) + return -1; + + if (!oi) + oi = &blank_oi; + + if (!(flags & OBJECT_INFO_SKIP_CACHED)) { + struct cached_object *co = find_cached_object(real->hash); + if (co) { + if (oi->typep) + *(oi->typep) = co->type; + if (oi->sizep) + *(oi->sizep) = co->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_sha1) + hashclr(oi->delta_base_sha1); + if (oi->type_name) + strbuf_addstr(oi->type_name, type_name(co->type)); + if (oi->contentp) + *oi->contentp = xmemdupz(co->buf, co->size); + oi->whence = OI_CACHED; + return 0; + } + } + + while (1) { + if (find_pack_entry(r, real->hash, &e)) + break; + + if (flags & OBJECT_INFO_IGNORE_LOOSE) + return -1; + + /* Most likely it's a loose object. */ + if (!sha1_loose_object_info(r, real->hash, oi, flags)) + return 0; + + /* Not a loose object; someone else may have just packed it. */ + if (!(flags & OBJECT_INFO_QUICK)) { + reprepare_packed_git(r); + if (find_pack_entry(r, real->hash, &e)) + break; + } + + /* Check if it is a missing object */ + if (fetch_if_missing && repository_format_partial_clone && + !already_retried && r == the_repository) { + /* + * TODO Investigate having fetch_object() return + * TODO error/success and stopping the music here. + * TODO Pass a repository struct through fetch_object, + * such that arbitrary repositories work. + */ + fetch_object(repository_format_partial_clone, real->hash); + already_retried = 1; + continue; + } + + return -1; + } + + if (oi == &blank_oi) + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + return 0; + rtype = packed_object_info(r, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, real->hash); + return oid_object_info_extended(r, real, oi, 0); + } else if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + +/* returns enum object_type or negative */ +int oid_object_info(struct repository *r, + const struct object_id *oid, + unsigned long *sizep) +{ + enum object_type type; + struct object_info oi = OBJECT_INFO_INIT; + + oi.typep = &type; + oi.sizep = sizep; + if (oid_object_info_extended(r, oid, &oi, + OBJECT_INFO_LOOKUP_REPLACE) < 0) + return -1; + return type; +} + +static void *read_object(const unsigned char *sha1, enum object_type *type, + unsigned long *size) +{ + struct object_id oid; + struct object_info oi = OBJECT_INFO_INIT; + void *content; + oi.typep = type; + oi.sizep = size; + oi.contentp = &content; + + hashcpy(oid.hash, sha1); + + if (oid_object_info_extended(the_repository, &oid, &oi, 0) < 0) + return NULL; + return content; +} + +int pretend_object_file(void *buf, unsigned long len, enum object_type type, + struct object_id *oid) +{ + struct cached_object *co; + + hash_object_file(buf, len, type_name(type), oid); + if (has_sha1_file(oid->hash) || find_cached_object(oid->hash)) + return 0; + ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc); + co = &cached_objects[cached_object_nr++]; + co->size = len; + co->type = type; + co->buf = xmalloc(len); + memcpy(co->buf, buf, len); + hashcpy(co->sha1, oid->hash); + return 0; +} + +/* + * This function dies on corrupt objects; the callers who want to + * deal with them should arrange to call read_object() and give error + * messages themselves. + */ +void *read_object_file_extended(const struct object_id *oid, + enum object_type *type, + unsigned long *size, + int lookup_replace) +{ + void *data; + const struct packed_git *p; + const char *path; + struct stat st; + const struct object_id *repl = lookup_replace ? + lookup_replace_object(the_repository, oid) : oid; + + errno = 0; + data = read_object(repl->hash, type, size); + if (data) + return data; + + if (errno && errno != ENOENT) + die_errno("failed to read object %s", oid_to_hex(oid)); + + /* die if we replaced an object with one that does not exist */ + if (repl != oid) + die("replacement %s not found for %s", + oid_to_hex(repl), oid_to_hex(oid)); + + if (!stat_sha1_file(the_repository, repl->hash, &st, &path)) + die("loose object %s (stored in %s) is corrupt", + oid_to_hex(repl), path); + + if ((p = has_packed_and_bad(repl->hash)) != NULL) + die("packed object %s (stored in %s) is corrupt", + oid_to_hex(repl), p->pack_name); + + return NULL; +} + +void *read_object_with_reference(const struct object_id *oid, + const char *required_type_name, + unsigned long *size, + struct object_id *actual_oid_return) +{ + enum object_type type, required_type; + void *buffer; + unsigned long isize; + struct object_id actual_oid; + + required_type = type_from_string(required_type_name); + oidcpy(&actual_oid, oid); + while (1) { + int ref_length = -1; + const char *ref_type = NULL; + + buffer = read_object_file(&actual_oid, &type, &isize); + if (!buffer) + return NULL; + if (type == required_type) { + *size = isize; + if (actual_oid_return) + oidcpy(actual_oid_return, &actual_oid); + return buffer; + } + /* Handle references */ + else if (type == OBJ_COMMIT) + ref_type = "tree "; + else if (type == OBJ_TAG) + ref_type = "object "; + else { + free(buffer); + return NULL; + } + ref_length = strlen(ref_type); + + if (ref_length + GIT_SHA1_HEXSZ > isize || + memcmp(buffer, ref_type, ref_length) || + get_oid_hex((char *) buffer + ref_length, &actual_oid)) { + free(buffer); + return NULL; + } + free(buffer); + /* Now we have the ID of the referred-to object in + * actual_oid. Check again. */ + } +} + +static void write_object_file_prepare(const void *buf, unsigned long len, + const char *type, struct object_id *oid, + char *hdr, int *hdrlen) +{ + git_hash_ctx c; + + /* Generate the header */ + *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1; + + /* Sha1.. */ + the_hash_algo->init_fn(&c); + the_hash_algo->update_fn(&c, hdr, *hdrlen); + the_hash_algo->update_fn(&c, buf, len); + the_hash_algo->final_fn(oid->hash, &c); +} + +/* + * Move the just written object into its final resting place. + */ +int finalize_object_file(const char *tmpfile, const char *filename) +{ + int ret = 0; + + if (object_creation_mode == OBJECT_CREATION_USES_RENAMES) + goto try_rename; + else if (link(tmpfile, filename)) + ret = errno; + + /* + * Coda hack - coda doesn't like cross-directory links, + * so we fall back to a rename, which will mean that it + * won't be able to check collisions, but that's not a + * big deal. + * + * The same holds for FAT formatted media. + * + * When this succeeds, we just return. We have nothing + * left to unlink. + */ + if (ret && ret != EEXIST) { + try_rename: + if (!rename(tmpfile, filename)) + goto out; + ret = errno; + } + unlink_or_warn(tmpfile); + if (ret) { + if (ret != EEXIST) { + return error_errno("unable to write sha1 filename %s", filename); + } + /* FIXME!!! Collision check here ? */ + } + +out: + if (adjust_shared_perm(filename)) + return error("unable to set permission to '%s'", filename); + return 0; +} + +static int write_buffer(int fd, const void *buf, size_t len) +{ + if (write_in_full(fd, buf, len) < 0) + return error_errno("file write error"); + return 0; +} + +int hash_object_file(const void *buf, unsigned long len, const char *type, + struct object_id *oid) +{ + char hdr[MAX_HEADER_LEN]; + int hdrlen = sizeof(hdr); + write_object_file_prepare(buf, len, type, oid, hdr, &hdrlen); + return 0; +} + +/* Finalize a file on disk, and close it. */ +static void close_sha1_file(int fd) +{ + if (fsync_object_files) + fsync_or_die(fd, "sha1 file"); + if (close(fd) != 0) + die_errno("error when closing sha1 file"); +} + +/* Size of directory component, including the ending '/' */ +static inline int directory_size(const char *filename) +{ + const char *s = strrchr(filename, '/'); + if (!s) + return 0; + return s - filename + 1; +} + +/* + * This creates a temporary file in the same directory as the final + * 'filename' + * + * We want to avoid cross-directory filename renames, because those + * can have problems on various filesystems (FAT, NFS, Coda). + */ +static int create_tmpfile(struct strbuf *tmp, const char *filename) +{ + int fd, dirlen = directory_size(filename); + + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen); + strbuf_addstr(tmp, "tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); + if (fd < 0 && dirlen && errno == ENOENT) { + /* + * Make sure the directory exists; note that the contents + * of the buffer are undefined after mkstemp returns an + * error, so we have to rewrite the whole buffer from + * scratch. + */ + strbuf_reset(tmp); + strbuf_add(tmp, filename, dirlen - 1); + if (mkdir(tmp->buf, 0777) && errno != EEXIST) + return -1; + if (adjust_shared_perm(tmp->buf)) + return -1; + + /* Try again */ + strbuf_addstr(tmp, "/tmp_obj_XXXXXX"); + fd = git_mkstemp_mode(tmp->buf, 0444); + } + return fd; +} + +static int write_loose_object(const struct object_id *oid, char *hdr, + int hdrlen, const void *buf, unsigned long len, + time_t mtime) +{ + int fd, ret; + unsigned char compressed[4096]; + git_zstream stream; + git_hash_ctx c; + struct object_id parano_oid; + static struct strbuf tmp_file = STRBUF_INIT; + static struct strbuf filename = STRBUF_INIT; + + strbuf_reset(&filename); + sha1_file_name(the_repository, &filename, oid->hash); + + fd = create_tmpfile(&tmp_file, filename.buf); + if (fd < 0) { + if (errno == EACCES) + return error("insufficient permission for adding an object to repository database %s", get_object_directory()); + else + return error_errno("unable to create temporary file"); + } + + /* Set it up */ + git_deflate_init(&stream, zlib_compression_level); + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + the_hash_algo->init_fn(&c); + + /* First header.. */ + stream.next_in = (unsigned char *)hdr; + stream.avail_in = hdrlen; + while (git_deflate(&stream, 0) == Z_OK) + ; /* nothing */ + the_hash_algo->update_fn(&c, hdr, hdrlen); + + /* Then the data itself.. */ + stream.next_in = (void *)buf; + stream.avail_in = len; + do { + unsigned char *in0 = stream.next_in; + ret = git_deflate(&stream, Z_FINISH); + the_hash_algo->update_fn(&c, in0, stream.next_in - in0); + if (write_buffer(fd, compressed, stream.next_out - compressed) < 0) + die("unable to write sha1 file"); + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + } while (ret == Z_OK); + + if (ret != Z_STREAM_END) + die("unable to deflate new object %s (%d)", oid_to_hex(oid), + ret); + ret = git_deflate_end_gently(&stream); + if (ret != Z_OK) + die("deflateEnd on object %s failed (%d)", oid_to_hex(oid), + ret); + the_hash_algo->final_fn(parano_oid.hash, &c); + if (oidcmp(oid, ¶no_oid) != 0) + die("confused by unstable object source data for %s", + oid_to_hex(oid)); + + close_sha1_file(fd); + + if (mtime) { + struct utimbuf utb; + utb.actime = mtime; + utb.modtime = mtime; + if (utime(tmp_file.buf, &utb) < 0) + warning_errno("failed utime() on %s", tmp_file.buf); + } + + return finalize_object_file(tmp_file.buf, filename.buf); +} + +static int freshen_loose_object(const unsigned char *sha1) +{ + return check_and_freshen(sha1, 1); +} + +static int freshen_packed_object(const unsigned char *sha1) +{ + struct pack_entry e; + if (!find_pack_entry(the_repository, sha1, &e)) + return 0; + if (e.p->freshened) + return 1; + if (!freshen_file(e.p->pack_name)) + return 0; + e.p->freshened = 1; + return 1; +} + +int write_object_file(const void *buf, unsigned long len, const char *type, + struct object_id *oid) +{ + char hdr[MAX_HEADER_LEN]; + int hdrlen = sizeof(hdr); + + /* Normally if we have it in the pack then we do not bother writing + * it out into .git/objects/??/?{38} file. + */ + write_object_file_prepare(buf, len, type, oid, hdr, &hdrlen); + if (freshen_packed_object(oid->hash) || freshen_loose_object(oid->hash)) + return 0; + return write_loose_object(oid, hdr, hdrlen, buf, len, 0); +} + +int hash_object_file_literally(const void *buf, unsigned long len, + const char *type, struct object_id *oid, + unsigned flags) +{ + char *header; + int hdrlen, status = 0; + + /* type string, SP, %lu of the length plus NUL must fit this */ + hdrlen = strlen(type) + MAX_HEADER_LEN; + header = xmalloc(hdrlen); + write_object_file_prepare(buf, len, type, oid, header, &hdrlen); + + if (!(flags & HASH_WRITE_OBJECT)) + goto cleanup; + if (freshen_packed_object(oid->hash) || freshen_loose_object(oid->hash)) + goto cleanup; + status = write_loose_object(oid, header, hdrlen, buf, len, 0); + +cleanup: + free(header); + return status; +} + +int force_object_loose(const struct object_id *oid, time_t mtime) +{ + void *buf; + unsigned long len; + enum object_type type; + char hdr[MAX_HEADER_LEN]; + int hdrlen; + int ret; + + if (has_loose_object(oid->hash)) + return 0; + buf = read_object(oid->hash, &type, &len); + if (!buf) + return error("cannot read sha1_file for %s", oid_to_hex(oid)); + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", type_name(type), len) + 1; + ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime); + free(buf); + + return ret; +} + +int has_sha1_file_with_flags(const unsigned char *sha1, int flags) +{ + struct object_id oid; + if (!startup_info->have_repository) + return 0; + hashcpy(oid.hash, sha1); + return oid_object_info_extended(the_repository, &oid, NULL, + flags | OBJECT_INFO_SKIP_CACHED) >= 0; +} + +int has_object_file(const struct object_id *oid) +{ + return has_sha1_file(oid->hash); +} + +int has_object_file_with_flags(const struct object_id *oid, int flags) +{ + return has_sha1_file_with_flags(oid->hash, flags); +} + +static void check_tree(const void *buf, size_t size) +{ + struct tree_desc desc; + struct name_entry entry; + + init_tree_desc(&desc, buf, size); + while (tree_entry(&desc, &entry)) + /* do nothing + * tree_entry() will die() on malformed entries */ + ; +} + +static void check_commit(const void *buf, size_t size) +{ + struct commit c; + memset(&c, 0, sizeof(c)); + if (parse_commit_buffer(&c, buf, size)) + die("corrupt commit"); +} + +static void check_tag(const void *buf, size_t size) +{ + struct tag t; + memset(&t, 0, sizeof(t)); + if (parse_tag_buffer(&t, buf, size)) + die("corrupt tag"); +} + +static int index_mem(struct object_id *oid, void *buf, size_t size, + enum object_type type, + const char *path, unsigned flags) +{ + int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; + + if (!type) + type = OBJ_BLOB; + + /* + * Convert blobs to git internal format + */ + if ((type == OBJ_BLOB) && path) { + struct strbuf nbuf = STRBUF_INIT; + if (convert_to_git(&the_index, path, buf, size, &nbuf, + get_conv_flags(flags))) { + buf = strbuf_detach(&nbuf, &size); + re_allocated = 1; + } + } + if (flags & HASH_FORMAT_CHECK) { + if (type == OBJ_TREE) + check_tree(buf, size); + if (type == OBJ_COMMIT) + check_commit(buf, size); + if (type == OBJ_TAG) + check_tag(buf, size); + } + + if (write_object) + ret = write_object_file(buf, size, type_name(type), oid); + else + ret = hash_object_file(buf, size, type_name(type), oid); + if (re_allocated) + free(buf); + return ret; +} + +static int index_stream_convert_blob(struct object_id *oid, int fd, + const char *path, unsigned flags) +{ + int ret; + const int write_object = flags & HASH_WRITE_OBJECT; + struct strbuf sbuf = STRBUF_INIT; + + assert(path); + assert(would_convert_to_git_filter_fd(path)); + + convert_to_git_filter_fd(&the_index, path, fd, &sbuf, + get_conv_flags(flags)); + + if (write_object) + ret = write_object_file(sbuf.buf, sbuf.len, type_name(OBJ_BLOB), + oid); + else + ret = hash_object_file(sbuf.buf, sbuf.len, type_name(OBJ_BLOB), + oid); + strbuf_release(&sbuf); + return ret; +} + +static int index_pipe(struct object_id *oid, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(oid, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + +#define SMALL_FILE_SIZE (32*1024) + +static int index_core(struct object_id *oid, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + int ret; + + if (!size) { + ret = index_mem(oid, "", size, type, path, flags); + } else if (size <= SMALL_FILE_SIZE) { + char *buf = xmalloc(size); + ssize_t read_result = read_in_full(fd, buf, size); + if (read_result < 0) + ret = error_errno("read error while indexing %s", + path ? path : ""); + else if (read_result != size) + ret = error("short read while indexing %s", + path ? path : ""); + else + ret = index_mem(oid, buf, size, type, path, flags); + free(buf); + } else { + void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + ret = index_mem(oid, buf, size, type, path, flags); + munmap(buf, size); + } + return ret; +} + +/* + * This creates one packfile per large blob unless bulk-checkin + * machinery is "plugged". + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_stream(struct object_id *oid, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + return index_bulk_checkin(oid, fd, size, type, path, flags); +} + +int index_fd(struct object_id *oid, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + + /* + * Call xsize_t() only when needed to avoid potentially unnecessary + * die() for large files. + */ + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path)) + ret = index_stream_convert_blob(oid, fd, path, flags); + else if (!S_ISREG(st->st_mode)) + ret = index_pipe(oid, fd, type, path, flags); + else if (st->st_size <= big_file_threshold || type != OBJ_BLOB || + (path && would_convert_to_git(&the_index, path))) + ret = index_core(oid, fd, xsize_t(st->st_size), type, path, + flags); + else + ret = index_stream(oid, fd, xsize_t(st->st_size), type, path, + flags); + close(fd); + return ret; +} + +int index_path(struct object_id *oid, const char *path, struct stat *st, unsigned flags) +{ + int fd; + struct strbuf sb = STRBUF_INIT; + int rc = 0; + + switch (st->st_mode & S_IFMT) { + case S_IFREG: + fd = open(path, O_RDONLY); + if (fd < 0) + return error_errno("open(\"%s\")", path); + if (index_fd(oid, fd, st, OBJ_BLOB, path, flags) < 0) + return error("%s: failed to insert into database", + path); + break; + case S_IFLNK: + if (strbuf_readlink(&sb, path, st->st_size)) + return error_errno("readlink(\"%s\")", path); + if (!(flags & HASH_WRITE_OBJECT)) + hash_object_file(sb.buf, sb.len, blob_type, oid); + else if (write_object_file(sb.buf, sb.len, blob_type, oid)) + rc = error("%s: failed to insert into database", path); + strbuf_release(&sb); + break; + case S_IFDIR: + return resolve_gitlink_ref(path, "HEAD", oid); + default: + return error("%s: unsupported file type", path); + } + return rc; +} + +int read_pack_header(int fd, struct pack_header *header) +{ + if (read_in_full(fd, header, sizeof(*header)) != sizeof(*header)) + /* "eof before pack header was fully read" */ + return PH_ERROR_EOF; + + if (header->hdr_signature != htonl(PACK_SIGNATURE)) + /* "protocol error (pack signature mismatch detected)" */ + return PH_ERROR_PACK_SIGNATURE; + if (!pack_version_ok(header->hdr_version)) + /* "protocol error (pack version unsupported)" */ + return PH_ERROR_PROTOCOL; + return 0; +} + +void assert_oid_type(const struct object_id *oid, enum object_type expect) +{ + enum object_type type = oid_object_info(the_repository, oid, NULL); + if (type < 0) + die("%s is not a valid object", oid_to_hex(oid)); + if (type != expect) + die("%s is not a valid '%s' object", oid_to_hex(oid), + type_name(expect)); +} + +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + size_t origlen, baselen; + DIR *dir; + struct dirent *de; + int r = 0; + struct object_id oid; + + if (subdir_nr > 0xff) + BUG("invalid loose object subdirectory: %x", subdir_nr); + + origlen = path->len; + strbuf_complete(path, '/'); + strbuf_addf(path, "%02x", subdir_nr); + + dir = opendir(path->buf); + if (!dir) { + if (errno != ENOENT) + r = error_errno("unable to open %s", path->buf); + strbuf_setlen(path, origlen); + return r; + } + + oid.hash[0] = subdir_nr; + strbuf_addch(path, '/'); + baselen = path->len; + + while ((de = readdir(dir))) { + size_t namelen; + if (is_dot_or_dotdot(de->d_name)) + continue; + + namelen = strlen(de->d_name); + strbuf_setlen(path, baselen); + strbuf_add(path, de->d_name, namelen); + if (namelen == GIT_SHA1_HEXSZ - 2 && + !hex_to_bytes(oid.hash + 1, de->d_name, + GIT_SHA1_RAWSZ - 1)) { + if (obj_cb) { + r = obj_cb(&oid, path->buf, data); + if (r) + break; + } + continue; + } + + if (cruft_cb) { + r = cruft_cb(de->d_name, path->buf, data); + if (r) + break; + } + } + closedir(dir); + + strbuf_setlen(path, baselen - 1); + if (!r && subdir_cb) + r = subdir_cb(subdir_nr, path->buf, data); + + strbuf_setlen(path, origlen); + + return r; +} + +int for_each_loose_file_in_objdir_buf(struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + int r = 0; + int i; + + for (i = 0; i < 256; i++) { + r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb, + subdir_cb, data); + if (r) + break; + } + + return r; +} + +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + int r; + + strbuf_addstr(&buf, path); + r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb, + subdir_cb, data); + strbuf_release(&buf); + + return r; +} + +struct loose_alt_odb_data { + each_loose_object_fn *cb; + void *data; +}; + +static int loose_from_alt_odb(struct alternate_object_database *alt, + void *vdata) +{ + struct loose_alt_odb_data *data = vdata; + struct strbuf buf = STRBUF_INIT; + int r; + + strbuf_addstr(&buf, alt->path); + r = for_each_loose_file_in_objdir_buf(&buf, + data->cb, NULL, NULL, + data->data); + strbuf_release(&buf); + return r; +} + +int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags) +{ + struct loose_alt_odb_data alt; + int r; + + r = for_each_loose_file_in_objdir(get_object_directory(), + cb, NULL, NULL, data); + if (r) + return r; + + if (flags & FOR_EACH_OBJECT_LOCAL_ONLY) + return 0; + + alt.cb = cb; + alt.data = data; + return foreach_alt_odb(loose_from_alt_odb, &alt); +} + +static int check_stream_sha1(git_zstream *stream, + const char *hdr, + unsigned long size, + const char *path, + const unsigned char *expected_sha1) +{ + git_hash_ctx c; + unsigned char real_sha1[GIT_MAX_RAWSZ]; + unsigned char buf[4096]; + unsigned long total_read; + int status = Z_OK; + + the_hash_algo->init_fn(&c); + the_hash_algo->update_fn(&c, hdr, stream->total_out); + + /* + * We already read some bytes into hdr, but the ones up to the NUL + * do not count against the object's content size. + */ + total_read = stream->total_out - strlen(hdr) - 1; + + /* + * This size comparison must be "<=" to read the final zlib packets; + * see the comment in unpack_sha1_rest for details. + */ + while (total_read <= size && + (status == Z_OK || status == Z_BUF_ERROR)) { + stream->next_out = buf; + stream->avail_out = sizeof(buf); + if (size - total_read < stream->avail_out) + stream->avail_out = size - total_read; + status = git_inflate(stream, Z_FINISH); + the_hash_algo->update_fn(&c, buf, stream->next_out - buf); + total_read += stream->next_out - buf; + } + git_inflate_end(stream); + + if (status != Z_STREAM_END) { + error("corrupt loose object '%s'", sha1_to_hex(expected_sha1)); + return -1; + } + if (stream->avail_in) { + error("garbage at end of loose object '%s'", + sha1_to_hex(expected_sha1)); + return -1; + } + + the_hash_algo->final_fn(real_sha1, &c); + if (hashcmp(expected_sha1, real_sha1)) { + error("sha1 mismatch for %s (expected %s)", path, + sha1_to_hex(expected_sha1)); + return -1; + } + + return 0; +} + +int read_loose_object(const char *path, + const struct object_id *expected_oid, + enum object_type *type, + unsigned long *size, + void **contents) +{ + int ret = -1; + void *map = NULL; + unsigned long mapsize; + git_zstream stream; + char hdr[MAX_HEADER_LEN]; + + *contents = NULL; + + map = map_sha1_file_1(the_repository, path, NULL, &mapsize); + if (!map) { + error_errno("unable to mmap %s", path); + goto out; + } + + if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { + error("unable to unpack header of %s", path); + goto out; + } + + *type = parse_sha1_header(hdr, size); + if (*type < 0) { + error("unable to parse header of %s", path); + git_inflate_end(&stream); + goto out; + } + - if (*type == OBJ_BLOB) { ++ if (*type == OBJ_BLOB && *size > big_file_threshold) { + if (check_stream_sha1(&stream, hdr, *size, path, expected_oid->hash) < 0) + goto out; + } else { + *contents = unpack_sha1_rest(&stream, hdr, *size, expected_oid->hash); + if (!*contents) { + error("unable to unpack contents of %s", path); + git_inflate_end(&stream); + goto out; + } + if (check_object_signature(expected_oid, *contents, + *size, type_name(*type))) { + error("sha1 mismatch for %s (expected %s)", path, + oid_to_hex(expected_oid)); + free(*contents); + goto out; + } + } + + ret = 0; /* everything checks out */ + +out: + if (map) + munmap(map, mapsize); + return ret; +} diff --combined submodule-config.c index d87c3ff63a,e5f4901212..90d29348d3 --- a/submodule-config.c +++ b/submodule-config.c @@@ -190,6 -190,31 +190,31 @@@ static struct submodule *cache_lookup_n return NULL; } + int check_submodule_name(const char *name) + { + /* Disallow empty names */ + if (!*name) + return -1; + + /* + * Look for '..' as a path component. Check both '/' and '\\' as + * separators rather than is_dir_sep(), because we want the name rules + * to be consistent across platforms. + */ + goto in_component; /* always start inside component */ + while (*name) { + char c = *name++; + if (c == '/' || c == '\\') { + in_component: + if (name[0] == '.' && name[1] == '.' && + (!name[2] || name[2] == '/' || name[2] == '\\')) + return -1; + } + } + + return 0; + } + static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { @@@ -201,6 -226,12 +226,12 @@@ return 0; strbuf_add(name, subsection, subsection_len); + if (check_submodule_name(name->buf) < 0) { + warning(_("ignoring suspicious submodule name: %s"), name->buf); + strbuf_release(name); + return 0; + } + strbuf_addstr(item, key); return 1; @@@ -520,7 -551,7 +551,7 @@@ static const struct submodule *config_f if (submodule) goto out; - config = read_sha1_file(oid.hash, &type, &config_size); + config = read_object_file(&oid, &type, &config_size); if (!config || type != OBJ_BLOB) goto out; @@@ -619,24 -650,31 +650,24 @@@ static void gitmodules_read_check(struc repo_read_gitmodules(repo); } -const struct submodule *submodule_from_name(const struct object_id *treeish_name, +const struct submodule *submodule_from_name(struct repository *r, + const struct object_id *treeish_name, const char *name) { - gitmodules_read_check(the_repository); - return config_from(the_repository->submodule_cache, treeish_name, name, lookup_name); + gitmodules_read_check(r); + return config_from(r->submodule_cache, treeish_name, name, lookup_name); } -const struct submodule *submodule_from_path(const struct object_id *treeish_name, +const struct submodule *submodule_from_path(struct repository *r, + const struct object_id *treeish_name, const char *path) { - gitmodules_read_check(the_repository); - return config_from(the_repository->submodule_cache, treeish_name, path, lookup_path); + gitmodules_read_check(r); + return config_from(r->submodule_cache, treeish_name, path, lookup_path); } -const struct submodule *submodule_from_cache(struct repository *repo, - const struct object_id *treeish_name, - const char *key) +void submodule_free(struct repository *r) { - gitmodules_read_check(repo); - return config_from(repo->submodule_cache, treeish_name, - key, lookup_path); -} - -void submodule_free(void) -{ - if (the_repository->submodule_cache) - submodule_cache_clear(the_repository->submodule_cache); + if (r->submodule_cache) + submodule_cache_clear(r->submodule_cache); } diff --combined submodule-config.h index 6f686184e8,17e2970223..21273f56a3 --- a/submodule-config.h +++ b/submodule-config.h @@@ -39,12 -39,20 +39,19 @@@ extern int parse_update_recurse_submodu extern int parse_push_recurse_submodules_arg(const char *opt, const char *arg); extern void repo_read_gitmodules(struct repository *repo); extern void gitmodules_config_oid(const struct object_id *commit_oid); -extern const struct submodule *submodule_from_name( - const struct object_id *commit_or_tree, const char *name); -extern const struct submodule *submodule_from_path( - const struct object_id *commit_or_tree, const char *path); -extern const struct submodule *submodule_from_cache(struct repository *repo, - const struct object_id *treeish_name, - const char *key); -extern void submodule_free(void); +const struct submodule *submodule_from_name(struct repository *r, + const struct object_id *commit_or_tree, + const char *name); +const struct submodule *submodule_from_path(struct repository *r, + const struct object_id *commit_or_tree, + const char *path); +void submodule_free(struct repository *r); + /* + * Returns 0 if the name is syntactically acceptable as a submodule "name" + * (e.g., that may be found in the subsection of a .gitmodules file) and -1 + * otherwise. + */ + int check_submodule_name(const char *name); + #endif /* SUBMODULE_CONFIG_H */ diff --combined t/helper/test-path-utils.c index e115d44ac2,94846550f7..ae091d9b3e --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@@ -1,6 -1,6 +1,7 @@@ +#include "test-tool.h" #include "cache.h" #include "string-list.h" + #include "utf8.h" /* * A "string_list_each_func_t" function that normalizes an entry from @@@ -171,7 -171,12 +172,12 @@@ static struct test_data dirname_data[] { NULL, NULL } }; + static int is_dotgitmodules(const char *path) + { + return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); + } + -int cmd_main(int argc, const char **argv) +int cmd__path_utils(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { char *buf = xmallocz(strlen(argv[2])); @@@ -271,6 -276,20 +277,20 @@@ if (argc == 2 && !strcmp(argv[1], "dirname")) return test_function(dirname_data, posix_dirname, argv[1]); + if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) { + int res = 0, expect = 1, i; + for (i = 2; i < argc; i++) + if (!strcmp("--not", argv[i])) + expect = !expect; + else if (expect != is_dotgitmodules(argv[i])) + res = error("'%s' is %s.gitmodules", argv[i], + expect ? "not " : ""); + else + fprintf(stderr, "ok: '%s' is %s.gitmodules\n", + argv[i], expect ? "" : "not "); + return !!res; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --combined t/lib-pack.sh index 501078249d,4674899b30..c4d907a450 --- a/t/lib-pack.sh +++ b/t/lib-pack.sh @@@ -79,13 -79,25 +79,25 @@@ pack_obj () ;; esac + # If it's not a delta, we can convince pack-objects to generate a pack + # with just our entry, and then strip off the header (12 bytes) and + # trailer (20 bytes). + if test -z "$2" + then + echo "$1" | git pack-objects --stdout >pack_obj.tmp && + size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" return 1 } # Compute and append pack trailer to "$1" pack_trailer () { - test-sha1 -b <"$1" >trailer.tmp && + test-tool sha1 -b <"$1" >trailer.tmp && cat trailer.tmp >>"$1" && rm -f trailer.tmp } diff --combined t/t0060-path-utils.sh index f46e3c4995,3f3357ed9f..21a8b53132 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@@ -8,15 -8,15 +8,15 @@@ test_description='Test various path uti . ./test-lib.sh norm_path() { - expected=$(test-path-utils print_path "$2") + expected=$(test-tool path-utils print_path "$2") test_expect_success $3 "normalize path: $1 => $2" \ - "test \"\$(test-path-utils normalize_path_copy '$1')\" = '$expected'" + "test \"\$(test-tool path-utils normalize_path_copy '$1')\" = '$expected'" } relative_path() { - expected=$(test-path-utils print_path "$3") + expected=$(test-tool path-utils print_path "$3") test_expect_success $4 "relative path: $1 $2 => $3" \ - "test \"\$(test-path-utils relative_path '$1' '$2')\" = '$expected'" + "test \"\$(test-tool path-utils relative_path '$1' '$2')\" = '$expected'" } test_submodule_relative_url() { @@@ -37,7 -37,7 +37,7 @@@ test_git_path() # On Windows, we are using MSYS's bash, which mangles the paths. # Absolute paths are anchored at the MSYS installation directory, # which means that the path / accounts for this many characters: -rootoff=$(test-path-utils normalize_path_copy / | wc -c) +rootoff=$(test-tool path-utils normalize_path_copy / | wc -c) # Account for the trailing LF: if test $rootoff = 2; then rootoff= # we are on Unix @@@ -46,7 -46,7 +46,7 @@@ els # In MSYS2, the root directory "/" is translated into a Windows # directory *with* trailing slash. Let's test for that and adjust # our expected longest ancestor length accordingly. - case "$(test-path-utils print_path /)" in + case "$(test-tool path-utils print_path /)" in */) rootslash=1;; *) rootslash=0;; esac @@@ -61,7 -61,7 +61,7 @@@ ancestor() expected=$(($expected+$rootoff)) fi test_expect_success "longest ancestor: $1 $2 => $expected" \ - "actual=\$(test-path-utils longest_ancestor_length '$1' '$2') && + "actual=\$(test-tool path-utils longest_ancestor_length '$1' '$2') && test \"\$actual\" = '$expected'" } @@@ -77,8 -77,8 +77,8 @@@ case $(uname -s) i ;; esac -test_expect_success basename 'test-path-utils basename' -test_expect_success dirname 'test-path-utils dirname' +test_expect_success basename 'test-tool path-utils basename' +test_expect_success dirname 'test-tool path-utils dirname' norm_path "" "" norm_path . "" @@@ -157,48 -157,48 +157,48 @@@ ancestor /foo/bar /foo:/bar ancestor /foo/bar /bar -1 test_expect_success 'strip_path_suffix' ' - test c:/msysgit = $(test-path-utils strip_path_suffix \ + test c:/msysgit = $(test-tool path-utils strip_path_suffix \ c:/msysgit/libexec//git-core libexec/git-core) ' test_expect_success 'absolute path rejects the empty string' ' - test_must_fail test-path-utils absolute_path "" + test_must_fail test-tool path-utils absolute_path "" ' test_expect_success 'real path rejects the empty string' ' - test_must_fail test-path-utils real_path "" + test_must_fail test-tool path-utils real_path "" ' test_expect_success POSIX 'real path works on absolute paths 1' ' nopath="hopefully-absent-path" && - test "/" = "$(test-path-utils real_path "/")" && - test "/$nopath" = "$(test-path-utils real_path "/$nopath")" + test "/" = "$(test-tool path-utils real_path "/")" && + test "/$nopath" = "$(test-tool path-utils real_path "/$nopath")" ' test_expect_success 'real path works on absolute paths 2' ' nopath="hopefully-absent-path" && # Find an existing top-level directory for the remaining tests: d=$(pwd -P | sed -e "s|^\([^/]*/[^/]*\)/.*|\1|") && - test "$d" = "$(test-path-utils real_path "$d")" && - test "$d/$nopath" = "$(test-path-utils real_path "$d/$nopath")" + test "$d" = "$(test-tool path-utils real_path "$d")" && + test "$d/$nopath" = "$(test-tool path-utils real_path "$d/$nopath")" ' test_expect_success POSIX 'real path removes extra leading slashes' ' nopath="hopefully-absent-path" && - test "/" = "$(test-path-utils real_path "///")" && - test "/$nopath" = "$(test-path-utils real_path "///$nopath")" && + test "/" = "$(test-tool path-utils real_path "///")" && + test "/$nopath" = "$(test-tool path-utils real_path "///$nopath")" && # Find an existing top-level directory for the remaining tests: d=$(pwd -P | sed -e "s|^\([^/]*/[^/]*\)/.*|\1|") && - test "$d" = "$(test-path-utils real_path "//$d")" && - test "$d/$nopath" = "$(test-path-utils real_path "//$d/$nopath")" + test "$d" = "$(test-tool path-utils real_path "//$d")" && + test "$d/$nopath" = "$(test-tool path-utils real_path "//$d/$nopath")" ' test_expect_success 'real path removes other extra slashes' ' nopath="hopefully-absent-path" && # Find an existing top-level directory for the remaining tests: d=$(pwd -P | sed -e "s|^\([^/]*/[^/]*\)/.*|\1|") && - test "$d" = "$(test-path-utils real_path "$d///")" && - test "$d/$nopath" = "$(test-path-utils real_path "$d///$nopath")" + test "$d" = "$(test-tool path-utils real_path "$d///")" && + test "$d/$nopath" = "$(test-tool path-utils real_path "$d///$nopath")" ' test_expect_success SYMLINKS 'real path works on symlinks' ' @@@ -209,35 -209,35 +209,35 @@@ mkdir third && dir="$(cd .git; pwd -P)" && dir2=third/../second/other/.git && - test "$dir" = "$(test-path-utils real_path $dir2)" && + test "$dir" = "$(test-tool path-utils real_path $dir2)" && file="$dir"/index && - test "$file" = "$(test-path-utils real_path $dir2/index)" && + test "$file" = "$(test-tool path-utils real_path $dir2/index)" && basename=blub && - test "$dir/$basename" = "$(cd .git && test-path-utils real_path "$basename")" && + test "$dir/$basename" = "$(cd .git && test-tool path-utils real_path "$basename")" && ln -s ../first/file .git/syml && sym="$(cd first; pwd -P)"/file && - test "$sym" = "$(test-path-utils real_path "$dir2/syml")" + test "$sym" = "$(test-tool path-utils real_path "$dir2/syml")" ' test_expect_success SYMLINKS 'prefix_path works with absolute paths to work tree symlinks' ' ln -s target symlink && - test "$(test-path-utils prefix_path prefix "$(pwd)/symlink")" = "symlink" + test "$(test-tool path-utils prefix_path prefix "$(pwd)/symlink")" = "symlink" ' test_expect_success 'prefix_path works with only absolute path to work tree' ' echo "" >expected && - test-path-utils prefix_path prefix "$(pwd)" >actual && + test-tool path-utils prefix_path prefix "$(pwd)" >actual && test_cmp expected actual ' test_expect_success 'prefix_path rejects absolute path to dir with same beginning as work tree' ' - test_must_fail test-path-utils prefix_path prefix "$(pwd)a" + test_must_fail test-tool path-utils prefix_path prefix "$(pwd)a" ' test_expect_success SYMLINKS 'prefix_path works with absolute path to a symlink to work tree having same beginning as work tree' ' git init repo && ln -s repo repolink && - test "a" = "$(cd repo && test-path-utils prefix_path prefix "$(pwd)/../repolink/a")" + test "a" = "$(cd repo && test-tool path-utils prefix_path prefix "$(pwd)/../repolink/a")" ' relative_path /foo/a/b/c/ /foo/a/b/ c/ @@@ -349,4 -349,90 +349,90 @@@ test_submodule_relative_url "(null)" "s test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo" test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo" + test_expect_success 'match .gitmodules' ' - test-path-utils is_dotgitmodules \ ++ test-tool path-utils is_dotgitmodules \ + .gitmodules \ + \ + .git${u200c}modules \ + \ + .Gitmodules \ + .gitmoduleS \ + \ + ".gitmodules " \ + ".gitmodules." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules ." \ + ".gitmodules.." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules . " \ + ".gitmodules ." \ + \ + ".Gitmodules " \ + ".Gitmodules." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules ." \ + ".Gitmodules.." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules . " \ + ".Gitmodules ." \ + \ + GITMOD~1 \ + gitmod~1 \ + GITMOD~2 \ + gitmod~3 \ + GITMOD~4 \ + \ + "GITMOD~1 " \ + "gitmod~2." \ + "GITMOD~3 " \ + "gitmod~4. " \ + "GITMOD~1 ." \ + "gitmod~2 " \ + "GITMOD~3. " \ + "gitmod~4 . " \ + \ + GI7EBA~1 \ + gi7eba~9 \ + \ + GI7EB~10 \ + GI7EB~11 \ + GI7EB~99 \ + GI7EB~10 \ + GI7E~100 \ + GI7E~101 \ + GI7E~999 \ + ~1000000 \ + ~9999999 \ + \ + --not \ + ".gitmodules x" \ + ".gitmodules .x" \ + \ + " .gitmodules" \ + \ + ..gitmodules \ + \ + gitmodules \ + \ + .gitmodule \ + \ + ".gitmodules x " \ + ".gitmodules .x" \ + \ + GI7EBA~ \ + GI7EBA~0 \ + GI7EBA~~1 \ + GI7EBA~X \ + Gx7EBA~1 \ + GI7EBX~1 \ + \ + GI7EB~1 \ + GI7EB~01 \ + GI7EB~1X + ' + test_done diff --combined utf8.c index 0fcc6487e3,f04c24409b..d55e20c641 --- a/utf8.c +++ b/utf8.c @@@ -81,7 -81,7 +81,7 @@@ static int git_wcwidth(ucs_char_t ch /* * Sorted list of non-overlapping intervals of non-spacing characters, */ -#include "unicode_width.h" +#include "unicode-width.h" /* test for 8-bit control characters */ if (ch == 0) @@@ -401,40 -401,18 +401,40 @@@ out strbuf_release(&sb_dst); } +/* + * Returns true (1) if the src encoding name matches the dst encoding + * name directly or one of its alternative names. E.g. UTF-16BE is the + * same as UTF16BE. + */ +static int same_utf_encoding(const char *src, const char *dst) +{ + if (istarts_with(src, "utf") && istarts_with(dst, "utf")) { + /* src[3] or dst[3] might be '\0' */ + int i = (src[3] == '-' ? 4 : 3); + int j = (dst[3] == '-' ? 4 : 3); + return !strcasecmp(src+i, dst+j); + } + return 0; +} + int is_encoding_utf8(const char *name) { if (!name) return 1; - if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8")) + if (same_utf_encoding("utf-8", name)) return 1; return 0; } int same_encoding(const char *src, const char *dst) { - if (is_encoding_utf8(src) && is_encoding_utf8(dst)) + static const char utf8[] = "UTF-8"; + + if (!src) + src = utf8; + if (!dst) + dst = utf8; + if (same_utf_encoding(src, dst)) return 1; return !strcasecmp(src, dst); } @@@ -560,45 -538,6 +560,45 @@@ char *reencode_string_len(const char *i } #endif +static int has_bom_prefix(const char *data, size_t len, + const char *bom, size_t bom_len) +{ + return data && bom && (len >= bom_len) && !memcmp(data, bom, bom_len); +} + +static const char utf16_be_bom[] = {0xFE, 0xFF}; +static const char utf16_le_bom[] = {0xFF, 0xFE}; +static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF}; +static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00}; + +int has_prohibited_utf_bom(const char *enc, const char *data, size_t len) +{ + return ( + (same_utf_encoding("UTF-16BE", enc) || + same_utf_encoding("UTF-16LE", enc)) && + (has_bom_prefix(data, len, utf16_be_bom, sizeof(utf16_be_bom)) || + has_bom_prefix(data, len, utf16_le_bom, sizeof(utf16_le_bom))) + ) || ( + (same_utf_encoding("UTF-32BE", enc) || + same_utf_encoding("UTF-32LE", enc)) && + (has_bom_prefix(data, len, utf32_be_bom, sizeof(utf32_be_bom)) || + has_bom_prefix(data, len, utf32_le_bom, sizeof(utf32_le_bom))) + ); +} + +int is_missing_required_utf_bom(const char *enc, const char *data, size_t len) +{ + return ( + (same_utf_encoding(enc, "UTF-16")) && + !(has_bom_prefix(data, len, utf16_be_bom, sizeof(utf16_be_bom)) || + has_bom_prefix(data, len, utf16_le_bom, sizeof(utf16_le_bom))) + ) || ( + (same_utf_encoding(enc, "UTF-32")) && + !(has_bom_prefix(data, len, utf32_be_bom, sizeof(utf32_be_bom)) || + has_bom_prefix(data, len, utf32_le_bom, sizeof(utf32_le_bom))) + ); +} + /* * Returns first character length in bytes for multi-byte `text` according to * `encoding`. @@@ -681,28 -620,33 +681,33 @@@ static ucs_char_t next_hfs_char(const c } } - int is_hfs_dotgit(const char *path) + static int is_hfs_dot_generic(const char *path, + const char *needle, size_t needle_len) { ucs_char_t c; c = next_hfs_char(&path); if (c != '.') return 0; - c = next_hfs_char(&path); /* * there's a great deal of other case-folding that occurs - * in HFS+, but this is enough to catch anything that will - * convert to ".git" + * in HFS+, but this is enough to catch our fairly vanilla + * hard-coded needles. */ - if (c != 'g' && c != 'G') - return 0; - c = next_hfs_char(&path); - if (c != 'i' && c != 'I') - return 0; - c = next_hfs_char(&path); - if (c != 't' && c != 'T') - return 0; + for (; needle_len > 0; needle++, needle_len--) { + c = next_hfs_char(&path); + + /* + * We know our needles contain only ASCII, so we clamp here to + * make the results of tolower() sane. + */ + if (c > 127) + return 0; + if (tolower(c) != *needle) + return 0; + } + c = next_hfs_char(&path); if (c && !is_dir_sep(c)) return 0; @@@ -710,6 -654,35 +715,35 @@@ return 1; } + /* + * Inline wrapper to make sure the compiler resolves strlen() on literals at + * compile time. + */ + static inline int is_hfs_dot_str(const char *path, const char *needle) + { + return is_hfs_dot_generic(path, needle, strlen(needle)); + } + + int is_hfs_dotgit(const char *path) + { + return is_hfs_dot_str(path, "git"); + } + + int is_hfs_dotgitmodules(const char *path) + { + return is_hfs_dot_str(path, "gitmodules"); + } + + int is_hfs_dotgitignore(const char *path) + { + return is_hfs_dot_str(path, "gitignore"); + } + + int is_hfs_dotgitattributes(const char *path) + { + return is_hfs_dot_str(path, "gitattributes"); + } + const char utf8_bom[] = "\357\273\277"; int skip_utf8_bom(char **text, size_t len) diff --combined utf8.h index cce654a64a,da19b43114..db73a2d8d3 --- a/utf8.h +++ b/utf8.h @@@ -52,8 -52,13 +52,13 @@@ int mbs_chrlen(const char **text, size_ * The path should be NUL-terminated, but we will match variants of both ".git\0" * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck * and verify_path(). + * + * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo". */ int is_hfs_dotgit(const char *path); + int is_hfs_dotgitmodules(const char *path); + int is_hfs_dotgitignore(const char *path); + int is_hfs_dotgitattributes(const char *path); typedef enum { ALIGN_LEFT, @@@ -70,32 -75,4 +75,32 @@@ void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, const char *s); +/* + * If a data stream is declared as UTF-16BE or UTF-16LE, then a UTF-16 + * BOM must not be used [1]. The same applies for the UTF-32 equivalents. + * The function returns true if this rule is violated. + * + * [1] http://unicode.org/faq/utf_bom.html#bom10 + */ +int has_prohibited_utf_bom(const char *enc, const char *data, size_t len); + +/* + * If the endianness is not defined in the encoding name, then we + * require a BOM. The function returns true if a required BOM is missing. + * + * The Unicode standard instructs to assume big-endian if there in no + * BOM for UTF-16/32 [1][2]. However, the W3C/WHATWG encoding standard + * used in HTML5 recommends to assume little-endian to "deal with + * deployed content" [3]. + * + * Therefore, strictly requiring a BOM seems to be the safest option for + * content in Git. + * + * [1] http://unicode.org/faq/utf_bom.html#gen6 + * [2] http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf + * Section 3.10, D98, page 132 + * [3] https://encoding.spec.whatwg.org/#utf-16le + */ +int is_missing_required_utf_bom(const char *enc, const char *data, size_t len); + #endif