From: Junio C Hamano Date: Wed, 26 Apr 2017 06:39:07 +0000 (+0900) Subject: Merge branch 'jh/add-index-entry-optim' X-Git-Tag: v2.13.0-rc1~8 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/6cbc478d83b5773d1925869e50bf6067306f4817?ds=inline;hp=-c Merge branch 'jh/add-index-entry-optim' "git checkout" that handles a lot of paths has been optimized by reducing the number of unnecessary checks of paths in the has_dir_name() function. * jh/add-index-entry-optim: read-cache: speed up has_dir_name (part 2) read-cache: speed up has_dir_name (part 1) read-cache: speed up add_index_entry during checkout p0006-read-tree-checkout: perf test to time read-tree read-cache: add strcmp_offset function --- 6cbc478d83b5773d1925869e50bf6067306f4817 diff --combined Makefile index eb1a1a7cff,0a4fb9de8c..c739023e4e --- a/Makefile +++ b/Makefile @@@ -626,12 -626,10 +626,12 @@@ TEST_PROGRAMS_NEED_X += test-line-buffe TEST_PROGRAMS_NEED_X += test-match-trees TEST_PROGRAMS_NEED_X += test-mergesort TEST_PROGRAMS_NEED_X += test-mktemp +TEST_PROGRAMS_NEED_X += test-online-cpus TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-prio-queue TEST_PROGRAMS_NEED_X += test-read-cache +TEST_PROGRAMS_NEED_X += test-ref-store TEST_PROGRAMS_NEED_X += test-regex TEST_PROGRAMS_NEED_X += test-revision-walking TEST_PROGRAMS_NEED_X += test-run-command @@@ -639,6 -637,7 +639,7 @@@ TEST_PROGRAMS_NEED_X += test-scrap-cach TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sha1-array TEST_PROGRAMS_NEED_X += test-sigchain + TEST_PROGRAMS_NEED_X += test-strcmp-offset TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-submodule-config TEST_PROGRAMS_NEED_X += test-subprocess @@@ -1853,7 -1852,6 +1854,7 @@@ perl/perl.mak: perl/PM.stam perl/PM.stamp: FORCE @$(FIND) perl -type f -name '*.pm' | sort >$@+ && \ + $(PERL_PATH) -V >>$@+ && \ { cmp $@+ $@ >/dev/null 2>/dev/null || mv $@+ $@; } && \ $(RM) $@+ diff --combined cache.h index 322ae58259,2aea7caf44..e1f0e182ad --- a/cache.h +++ b/cache.h @@@ -66,12 -66,8 +66,12 @@@ unsigned long git_deflate_bound(git_zst #define GIT_SHA1_RAWSZ 20 #define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) +/* The length in byte and in hex digits of the largest possible hash value. */ +#define GIT_MAX_RAWSZ GIT_SHA1_RAWSZ +#define GIT_MAX_HEXSZ GIT_SHA1_HEXSZ + struct object_id { - unsigned char hash[GIT_SHA1_RAWSZ]; + unsigned char hash[GIT_MAX_RAWSZ]; }; #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) @@@ -599,6 -595,7 +599,7 @@@ extern int write_locked_index(struct in extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); + extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); @@@ -710,8 -707,6 +711,8 @@@ extern void update_index_if_able(struc extern int hold_locked_index(struct lock_file *, int); extern void set_alternate_index_output(const char *); +extern int verify_index_checksum; + /* Environment bits from configuration mechanism */ extern int trust_executable_bit; extern int trust_ctime; @@@ -984,7 -979,7 +985,7 @@@ extern char *sha1_pack_index_name(cons extern const char *find_unique_abbrev(const unsigned char *sha1, int len); extern int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len); -extern const unsigned char null_sha1[GIT_SHA1_RAWSZ]; +extern const unsigned char null_sha1[GIT_MAX_RAWSZ]; extern const struct object_id null_oid; static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) @@@ -1166,7 -1161,7 +1167,7 @@@ typedef int create_file_fn(const char * int raceproof_create_file(const char *path, create_file_fn fn, void *cb); int mkdir_in_gitdir(const char *path); -extern char *expand_user_path(const char *path); +extern char *expand_user_path(const char *path, int real_home); const char *enter_repo(const char *path, int strict); static inline int is_absolute_path(const char *path) { @@@ -1366,7 -1361,7 +1367,7 @@@ extern int get_sha1_with_context(const extern int get_oid(const char *str, struct object_id *oid); -typedef int each_abbrev_fn(const unsigned char *sha1, void *); +typedef int each_abbrev_fn(const struct object_id *oid, void *); extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *); extern int set_disambiguate_hint_config(const char *var, const char *value); @@@ -1681,12 -1676,9 +1682,12 @@@ extern struct packed_git *find_sha1_pac extern void pack_report(void); /* - * Create a temporary file rooted in the object database directory. + * Create a temporary file rooted in the object database directory, or + * die on failure. The filename is taken from "pattern", which should have the + * usual "XXXXXX" trailer, and the resulting filename is written into the + * "template" buffer. Returns the open descriptor. */ -extern int odb_mkstemp(char *template, size_t limit, const char *pattern); +extern int odb_mkstemp(struct strbuf *template, const char *pattern); /* * Generate the filename to be used for a pack file with checksum "sha1" and @@@ -1891,11 -1883,6 +1892,11 @@@ enum config_origin_type CONFIG_ORIGIN_CMDLINE }; +struct config_options { + unsigned int respect_includes : 1; + const char *git_dir; +}; + typedef int (*config_fn_t)(const char *, const char *, void *); extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); @@@ -1909,13 -1896,12 +1910,13 @@@ extern void read_early_config(config_fn extern void git_config(config_fn_t fn, void *); extern int git_config_with_options(config_fn_t fn, void *, struct git_config_source *config_source, - int respect_includes); + const struct config_options *opts); extern int git_parse_ulong(const char *, unsigned long *); extern int git_parse_maybe_bool(const char *); extern int git_config_int(const char *, const char *); extern int64_t git_config_int64(const char *, const char *); extern unsigned long git_config_ulong(const char *, const char *); +extern ssize_t git_config_ssize_t(const char *, const char *); extern int git_config_bool_or_int(const char *, const char *, int *); extern int git_config_bool(const char *, const char *); extern int git_config_maybe_bool(const char *, const char *); @@@ -1962,7 -1948,6 +1963,7 @@@ struct config_include_data int depth; config_fn_t fn; void *data; + const struct config_options *opts; }; #define CONFIG_INCLUDE_INIT { 0 } extern int git_config_include(const char *name, const char *value, void *data); diff --combined read-cache.c index 008b335844,5d47ba8bdd..d8c657d6a7 --- a/read-cache.c +++ b/read-cache.c @@@ -887,9 -887,32 +887,32 @@@ static int has_file_name(struct index_s return retval; } + + /* + * Like strcmp(), but also return the offset of the first change. + * If strings are equal, return the length. + */ + int strcmp_offset(const char *s1, const char *s2, size_t *first_change) + { + size_t k; + + if (!first_change) + return strcmp(s1, s2); + + for (k = 0; s1[k] == s2[k]; k++) + if (s1[k] == '\0') + break; + + *first_change = k; + return (unsigned char)s1[k] - (unsigned char)s2[k]; + } + /* * Do we have another file with a pathname that is a proper * subset of the name we're trying to add? + * + * That is, is there another file in the index with a path + * that matches a sub-directory in the given entry? */ static int has_dir_name(struct index_state *istate, const struct cache_entry *ce, int pos, int ok_to_replace) @@@ -898,9 -921,51 +921,51 @@@ int stage = ce_stage(ce); const char *name = ce->name; const char *slash = name + ce_namelen(ce); + size_t len_eq_last; + int cmp_last = 0; + + /* + * We are frequently called during an iteration on a sorted + * list of pathnames and while building a new index. Therefore, + * there is a high probability that this entry will eventually + * be appended to the index, rather than inserted in the middle. + * If we can confirm that, we can avoid binary searches on the + * components of the pathname. + * + * Compare the entry's full path with the last path in the index. + */ + if (istate->cache_nr > 0) { + cmp_last = strcmp_offset(name, + istate->cache[istate->cache_nr - 1]->name, + &len_eq_last); + if (cmp_last > 0) { + if (len_eq_last == 0) { + /* + * The entry sorts AFTER the last one in the + * index and their paths have no common prefix, + * so there cannot be a F/D conflict. + */ + return retval; + } else { + /* + * The entry sorts AFTER the last one in the + * index, but has a common prefix. Fall through + * to the loop below to disect the entry's path + * and see where the difference is. + */ + } + } else if (cmp_last == 0) { + /* + * The entry exactly matches the last one in the + * index, but because of multiple stage and CE_REMOVE + * items, we fall through and let the regular search + * code handle it. + */ + } + } for (;;) { - int len; + size_t len; for (;;) { if (*--slash == '/') @@@ -910,6 -975,67 +975,67 @@@ } len = slash - name; + if (cmp_last > 0) { + /* + * (len + 1) is a directory boundary (including + * the trailing slash). And since the loop is + * decrementing "slash", the first iteration is + * the longest directory prefix; subsequent + * iterations consider parent directories. + */ + + if (len + 1 <= len_eq_last) { + /* + * The directory prefix (including the trailing + * slash) also appears as a prefix in the last + * entry, so the remainder cannot collide (because + * strcmp said the whole path was greater). + * + * EQ: last: xxx/A + * this: xxx/B + * + * LT: last: xxx/file_A + * this: xxx/file_B + */ + return retval; + } + + if (len > len_eq_last) { + /* + * This part of the directory prefix (excluding + * the trailing slash) is longer than the known + * equal portions, so this sub-directory cannot + * collide with a file. + * + * GT: last: xxxA + * this: xxxB/file + */ + return retval; + } + + if (istate->cache_nr > 0 && + ce_namelen(istate->cache[istate->cache_nr - 1]) > len) { + /* + * The directory prefix lines up with part of + * a longer file or directory name, but sorts + * after it, so this sub-directory cannot + * collide with a file. + * + * last: xxx/yy-file (because '-' sorts before '/') + * this: xxx/yy/abc + */ + return retval; + } + + /* + * This is a possible collision. Fall through and + * let the regular search code handle it. + * + * last: xxx + * this: xxx/file + */ + } + pos = index_name_stage_pos(istate, name, len, stage); if (pos >= 0) { /* @@@ -1001,7 -1127,16 +1127,16 @@@ static int add_index_entry_with_check(s if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) cache_tree_invalidate_path(istate, ce->name); - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); + + /* + * If this entry's path sorts after the last entry in the index, + * we can avoid searching for it. + */ + if (istate->cache_nr > 0 && + strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0) + pos = -istate->cache_nr - 1; + else + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); /* existing match? Just replace it. */ if (pos >= 0) { @@@ -1371,9 -1506,6 +1506,9 @@@ struct ondisk_cache_entry_extended ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ ondisk_cache_entry_size(ce_namelen(ce))) +/* Allow fsck to force verification of the index checksum. */ +int verify_index_checksum; + static int verify_hdr(struct cache_header *hdr, unsigned long size) { git_SHA_CTX c; @@@ -1385,10 -1517,6 +1520,10 @@@ hdr_version = ntohl(hdr->hdr_version); if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version) return error("bad index version %d", hdr_version); + + if (!verify_index_checksum) + return 0; + git_SHA1_Init(&c); git_SHA1_Update(&c, hdr, size - 20); git_SHA1_Final(sha1, &c); diff --combined t/helper/.gitignore index acd5db180f,e7fe7ff598..721650256e --- a/t/helper/.gitignore +++ b/t/helper/.gitignore @@@ -16,18 -16,17 +16,19 @@@ /test-match-trees /test-mergesort /test-mktemp +/test-online-cpus /test-parse-options /test-path-utils /test-prio-queue /test-read-cache +/test-ref-store /test-regex /test-revision-walking /test-run-command /test-sha1 /test-sha1-array /test-sigchain + /test-strcmp-offset /test-string-list /test-submodule-config /test-subprocess