From: Junio C Hamano Date: Thu, 18 Jul 2013 19:59:41 +0000 (-0700) Subject: Merge branch 'jk/in-pack-size-measurement' X-Git-Tag: v1.8.4-rc0~44 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/802f878b86332a7841dab89dfe29e3e6c90979ab?ds=inline;hp=-c Merge branch 'jk/in-pack-size-measurement' "git cat-file --batch-check=" is added, primarily to allow on-disk footprint of objects in packfiles (often they are a lot smaller than their true size, when expressed as deltas) to be reported. * jk/in-pack-size-measurement: pack-revindex: radix-sort the revindex pack-revindex: use unsigned to store number of objects cat-file: split --batch input lines on whitespace cat-file: add %(objectsize:disk) format atom cat-file: add --batch-check= cat-file: refactor --batch option parsing cat-file: teach --batch to stream blob objects t1006: modernize output comparisons teach sha1_object_info_extended a "disk_size" query zero-initialize object_info structs --- 802f878b86332a7841dab89dfe29e3e6c90979ab diff --combined cache.h index dd0fb33a15,f2915509a6..2d06169155 --- a/cache.h +++ b/cache.h @@@ -119,19 -119,15 +119,19 @@@ struct cache_time unsigned int nsec; }; +struct stat_data { + struct cache_time sd_ctime; + struct cache_time sd_mtime; + unsigned int sd_dev; + unsigned int sd_ino; + unsigned int sd_uid; + unsigned int sd_gid; + unsigned int sd_size; +}; + struct cache_entry { - struct cache_time ce_ctime; - struct cache_time ce_mtime; - unsigned int ce_dev; - unsigned int ce_ino; + struct stat_data ce_stat_data; unsigned int ce_mode; - unsigned int ce_uid; - unsigned int ce_gid; - unsigned int ce_size; unsigned int ce_flags; unsigned int ce_namelen; unsigned char sha1[20]; @@@ -194,8 -190,7 +194,8 @@@ * another. But we never change the name, or the hash state! */ #define CE_STATE_MASK (CE_HASHED | CE_UNHASHED) -static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry *src) +static inline void copy_cache_entry(struct cache_entry *dst, + const struct cache_entry *src) { unsigned int state = dst->ce_flags & CE_STATE_MASK; @@@ -227,8 -222,7 +227,8 @@@ static inline unsigned int create_ce_mo return S_IFGITLINK; return S_IFREG | ce_permissions(mode); } -static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned int mode) +static inline unsigned int ce_mode_from_stat(const struct cache_entry *ce, + unsigned int mode) { extern int trust_executable_bit, has_symlinks; if (!has_symlinks && S_ISREG(mode) && @@@ -486,8 -480,8 +486,8 @@@ extern void *read_blob_data_from_index( #define CE_MATCH_RACY_IS_DIRTY 02 /* do stat comparison even if CE_SKIP_WORKTREE is true */ #define CE_MATCH_IGNORE_SKIP_WORKTREE 04 -extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int); -extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int); +extern int ie_match_stat(const struct index_state *, const struct cache_entry *, struct stat *, unsigned int); +extern int ie_modified(const struct index_state *, const struct cache_entry *, struct stat *, unsigned int); #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ @@@ -515,21 -509,6 +515,21 @@@ extern int limit_pathspec_to_literal(vo #define HASH_FORMAT_CHECK 2 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); + +/* + * Record to sd the data from st that we use to check whether a file + * might have changed. + */ +extern void fill_stat_data(struct stat_data *sd, struct stat *st); + +/* + * Return 0 if st is consistent with a file not having been changed + * since sd was filled. If there are differences, return a + * combination of MTIME_CHANGED, CTIME_CHANGED, OWNER_CHANGED, + * INODE_CHANGED, and DATA_CHANGED. + */ +extern int match_stat_data(const struct stat_data *sd, struct stat *st); + extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ @@@ -793,6 -772,9 +793,6 @@@ extern int parse_sha1_header(const cha /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; -/* for development: log offset of pack access */ -extern const char *log_pack_access; - extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); extern int move_temp_to_file(const char *tmpfile, const char *filename); @@@ -928,7 -910,6 +928,7 @@@ void show_date_relative(unsigned long t struct strbuf *timebuf); int parse_date(const char *date, char *buf, int bufsize); int parse_date_basic(const char *date, unsigned long *timestamp, int *offset); +int parse_expiry_date(const char *date, unsigned long *timestamp); void datestamp(char *buf, int bufsize); #define approxidate(s) approxidate_careful((s), NULL) unsigned long approxidate_careful(const char *, int *); @@@ -1043,21 -1024,9 +1043,21 @@@ struct ref unsigned int force:1, forced_update:1, - merge:1, deletion:1, matched:1; + + /* + * Order is important here, as we write to FETCH_HEAD + * in numeric order. And the default NOT_FOR_MERGE + * should be 0, so that xcalloc'd structures get it + * by default. + */ + enum { + FETCH_HEAD_MERGE = -1, + FETCH_HEAD_NOT_FOR_MERGE = 0, + FETCH_HEAD_IGNORE = 1 + } fetch_head_status; + enum { REF_STATUS_NONE = 0, REF_STATUS_OK, @@@ -1130,6 -1099,7 +1130,7 @@@ extern int unpack_object_header(struct struct object_info { /* Request */ unsigned long *sizep; + unsigned long *disk_sizep; /* Response */ enum { @@@ -1357,31 -1327,4 +1358,31 @@@ int checkout_fast_forward(const unsigne int sane_execvp(const char *file, char *const argv[]); +/* + * A struct to encapsulate the concept of whether a file has changed + * since we last checked it. This uses criteria similar to those used + * for the index. + */ +struct stat_validity { + struct stat_data *sd; +}; + +void stat_validity_clear(struct stat_validity *sv); + +/* + * Returns 1 if the path is a regular file (or a symlink to a regular + * file) and matches the saved stat_validity, 0 otherwise. A missing + * or inaccessible file is considered a match if the struct was just + * initialized, or if the previous update found an inaccessible file. + */ +int stat_validity_check(struct stat_validity *sv, const char *path); + +/* + * Update the stat_validity from a file opened at descriptor fd. If + * the file is missing, inaccessible, or not a regular file, then + * future calls to stat_validity_check will match iff one of those + * conditions continues to be true. + */ +void stat_validity_update(struct stat_validity *sv, int fd); + #endif /* CACHE_H */ diff --combined sha1_file.c index 0af19c00f1,6baed676dc..4c2365f48f --- a/sha1_file.c +++ b/sha1_file.c @@@ -36,9 -36,6 +36,9 @@@ static inline uintmax_t sz_fmt(size_t s const unsigned char null_sha1[20]; +static const char *no_log_pack_access = "no_log_pack_access"; +static const char *log_pack_access; + /* * This is meant to hold a *small* number of objects that you would * want read_sha1_file() to be able to return, but yet you do not want @@@ -1697,7 -1694,8 +1697,8 @@@ static int retry_bad_packed_offset(stru #define POI_STACK_PREALLOC 64 static int packed_object_info(struct packed_git *p, off_t obj_offset, - unsigned long *sizep, int *rtype) + unsigned long *sizep, int *rtype, + unsigned long *disk_sizep) { struct pack_window *w_curs = NULL; unsigned long size; @@@ -1731,6 -1729,11 +1732,11 @@@ } } + if (disk_sizep) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + *disk_sizep = revidx[1].offset - obj_offset; + } + while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { off_t base_offset; /* Push the object we're going to leave behind */ @@@ -1959,19 -1962,12 +1965,19 @@@ static void write_pack_access_log(struc { static FILE *log_file; + if (!log_pack_access) + log_pack_access = getenv("GIT_TRACE_PACK_ACCESS"); + if (!log_pack_access) + log_pack_access = no_log_pack_access; + if (log_pack_access == no_log_pack_access) + return; + if (!log_file) { log_file = fopen(log_pack_access, "w"); if (!log_file) { error("cannot open pack access log '%s' for writing: %s", log_pack_access, strerror(errno)); - log_pack_access = NULL; + log_pack_access = no_log_pack_access; return; } } @@@ -2002,7 -1998,7 +2008,7 @@@ void *unpack_entry(struct packed_git *p int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; int base_from_cache = 0; - if (log_pack_access) + if (log_pack_access != no_log_pack_access) write_pack_access_log(p, obj_offset); /* PHASE 1: drill down to the innermost base object */ @@@ -2145,19 -2141,10 +2151,19 @@@ data = patch_delta(base, base_size, delta_data, delta_size, &size); + + /* + * We could not apply the delta; warn the user, but keep going. + * Our failure will be noticed either in the next iteration of + * the loop, or if this is the final delta, in the caller when + * we return NULL. Those code paths will take care of making + * a more explicit warning and retrying with another copy of + * the object. + */ if (!data) - die("failed to apply delta"); + error("failed to apply delta"); - free (delta_data); + free(delta_data); } *final_type = type; @@@ -2357,7 -2344,8 +2363,8 @@@ struct packed_git *find_sha1_pack(cons } - static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep) + static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep, + unsigned long *disk_sizep) { int status; unsigned long mapsize, size; @@@ -2367,7 -2355,9 +2374,9 @@@ map = map_sha1_file(sha1, &mapsize); if (!map) - return error("unable to find %s", sha1_to_hex(sha1)); + return -1; + if (disk_sizep) + *disk_sizep = mapsize; if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header", sha1_to_hex(sha1)); @@@ -2391,13 -2381,15 +2400,15 @@@ int sha1_object_info_extended(const uns if (co) { if (oi->sizep) *(oi->sizep) = co->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; oi->whence = OI_CACHED; return co->type; } if (!find_pack_entry(sha1, &e)) { /* Most likely it's a loose object. */ - status = sha1_loose_object_info(sha1, oi->sizep); + status = sha1_loose_object_info(sha1, oi->sizep, oi->disk_sizep); if (status >= 0) { oi->whence = OI_LOOSE; return status; @@@ -2409,7 -2401,8 +2420,8 @@@ return status; } - status = packed_object_info(e.p, e.offset, oi->sizep, &rtype); + status = packed_object_info(e.p, e.offset, oi->sizep, &rtype, + oi->disk_sizep); if (status < 0) { mark_bad_packed_object(e.p, sha1); status = sha1_object_info_extended(sha1, oi); @@@ -2428,7 -2421,7 +2440,7 @@@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) { - struct object_info oi; + struct object_info oi = {0}; oi.sizep = sizep; return sha1_object_info_extended(sha1, &oi);