From: Junio C Hamano Date: Tue, 19 Jul 2011 16:54:51 +0000 (-0700) Subject: Merge branch 'jc/index-pack' X-Git-Tag: v1.7.7-rc0~85 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/d907bf8ef327cd47433d4a4bb0a1bb4e96b6e340?ds=inline;hp=-c Merge branch 'jc/index-pack' * jc/index-pack: verify-pack: use index-pack --verify index-pack: show histogram when emulating "verify-pack -v" index-pack: start learning to emulate "verify-pack -v" index-pack: a miniscule refactor index-pack --verify: read anomalous offsets from v2 idx file write_idx_file: need_large_offset() helper function index-pack: --verify write_idx_file: introduce a struct to hold idx customization options index-pack: group the delta-base array entries also by type Conflicts: builtin/verify-pack.c cache.h sha1_file.c --- d907bf8ef327cd47433d4a4bb0a1bb4e96b6e340 diff --combined builtin/index-pack.c index 81cdc28b30,ed4c3bb13b..0945adbb3b --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@@ -1,4 -1,4 +1,4 @@@ -#include "cache.h" +#include "builtin.h" #include "delta.h" #include "pack.h" #include "csum-file.h" @@@ -11,14 -11,17 +11,16 @@@ #include "exec_cmd.h" static const char index_pack_usage[] = - "git index-pack [-v] [-o ] [ --keep | --keep= ] [--strict] ( | --stdin [--fix-thin] [])"; + "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; -struct object_entry -{ +struct object_entry { struct pack_idx_entry idx; unsigned long size; unsigned int hdr_size; enum object_type type; enum object_type real_type; + unsigned delta_depth; + int base_object_no; }; union delta_base { @@@ -43,7 -46,8 +45,7 @@@ struct base_data #define FLAG_LINK (1u<<20) #define FLAG_CHECKED (1u<<21) -struct delta_entry -{ +struct delta_entry { union delta_base base; int obj_no; }; @@@ -66,6 -70,7 +68,7 @@@ static struct progress *progress static unsigned char input_buffer[4096]; static unsigned int input_offset, input_len; static off_t consumed_bytes; + static unsigned deepest_delta; static git_SHA_CTX input_ctx; static uint32_t input_crc32; static int input_fd, output_fd, pack_fd; @@@ -207,7 -212,7 +210,7 @@@ static void parse_pack_header(void static NORETURN void bad_object(unsigned long offset, const char *format, ...) __attribute__((format (printf, 2, 3))); -static void bad_object(unsigned long offset, const char *format, ...) +static NORETURN void bad_object(unsigned long offset, const char *format, ...) { va_list params; char buf[1024]; @@@ -265,7 -270,7 +268,7 @@@ static void unlink_base_data(struct bas static void *unpack_entry_data(unsigned long offset, unsigned long size) { int status; - z_stream stream; + git_zstream stream; void *buf = xmalloc(size); memset(&stream, 0, sizeof(stream)); @@@ -294,7 -299,7 +297,7 @@@ static void *unpack_raw_entry(struct ob void *data; obj->idx.offset = consumed_bytes; - input_crc32 = crc32(0, Z_NULL, 0); + input_crc32 = crc32(0, NULL, 0); p = fill(1); c = *p; @@@ -355,7 -360,7 +358,7 @@@ static void *get_data_from_pack(struct off_t from = obj[0].idx.offset + obj[0].hdr_size; unsigned long len = obj[1].idx.offset - from; unsigned char *data, *inbuf; - z_stream stream; + git_zstream stream; int status; data = xmalloc(obj->size); @@@ -389,7 -394,18 +392,18 @@@ return data; } - static int find_delta(const union delta_base *base) + static int compare_delta_bases(const union delta_base *base1, + const union delta_base *base2, + enum object_type type1, + enum object_type type2) + { + int cmp = type1 - type2; + if (cmp) + return cmp; + return memcmp(base1, base2, UNION_BASE_SZ); + } + + static int find_delta(const union delta_base *base, enum object_type type) { int first = 0, last = nr_deltas; @@@ -398,7 -414,8 +412,8 @@@ struct delta_entry *delta = &deltas[next]; int cmp; - cmp = memcmp(base, &delta->base, UNION_BASE_SZ); + cmp = compare_delta_bases(base, &delta->base, + type, objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@@ -411,9 -428,10 +426,10 @@@ } static void find_delta_children(const union delta_base *base, - int *first_index, int *last_index) + int *first_index, int *last_index, + enum object_type type) { - int first = find_delta(base); + int first = find_delta(base, type); int last = first; int end = nr_deltas - 1; @@@ -483,12 -501,17 +499,17 @@@ static void sha1_object(const void *dat } } + static int is_delta_type(enum object_type type) + { + return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); + } + static void *get_base_data(struct base_data *c) { if (!c->data) { struct object_entry *obj = c->obj; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + if (is_delta_type(obj->type)) { void *base = get_base_data(c->base); void *raw = get_data_from_pack(obj); c->data = patch_delta( @@@ -515,6 -538,10 +536,10 @@@ static void resolve_delta(struct object void *base_data, *delta_data; delta_obj->real_type = base->obj->real_type; + delta_obj->delta_depth = base->obj->delta_depth + 1; + if (deepest_delta < delta_obj->delta_depth) + deepest_delta = delta_obj->delta_depth; + delta_obj->base_object_no = base->obj - objects; delta_data = get_data_from_pack(delta_obj); base_data = get_base_data(base); result->obj = delta_obj; @@@ -541,11 -568,13 +566,13 @@@ static void find_unresolved_deltas(stru union delta_base base_spec; hashcpy(base_spec.sha1, base->obj->idx.sha1); - find_delta_children(&base_spec, &ref_first, &ref_last); + find_delta_children(&base_spec, + &ref_first, &ref_last, OBJ_REF_DELTA); memset(&base_spec, 0, sizeof(base_spec)); base_spec.offset = base->obj->idx.offset; - find_delta_children(&base_spec, &ofs_first, &ofs_last); + find_delta_children(&base_spec, + &ofs_first, &ofs_last, OBJ_OFS_DELTA); } if (ref_last == -1 && ofs_last == -1) { @@@ -557,24 -586,24 +584,24 @@@ for (i = ref_first; i <= ref_last; i++) { struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_REF_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - if (i == ref_last && ofs_last == -1) - free_base_data(base); - find_unresolved_deltas(&result, base); - } + struct base_data result; + + assert(child->real_type == OBJ_REF_DELTA); + resolve_delta(child, base, &result); + if (i == ref_last && ofs_last == -1) + free_base_data(base); + find_unresolved_deltas(&result, base); } for (i = ofs_first; i <= ofs_last; i++) { struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_OFS_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - if (i == ofs_last) - free_base_data(base); - find_unresolved_deltas(&result, base); - } + struct base_data result; + + assert(child->real_type == OBJ_OFS_DELTA); + resolve_delta(child, base, &result); + if (i == ofs_last) + free_base_data(base); + find_unresolved_deltas(&result, base); } unlink_base_data(base); @@@ -584,7 -613,11 +611,11 @@@ static int compare_delta_entry(const vo { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); + + /* group by type (ref vs ofs) and then by value (sha-1 or offset) */ + return compare_delta_bases(&delta_a->base, &delta_b->base, + objects[delta_a->obj_no].type, + objects[delta_b->obj_no].type); } /* Parse all objects and return the pack content SHA1 hash */ @@@ -608,7 -641,7 +639,7 @@@ static void parse_pack_objects(unsigne struct object_entry *obj = &objects[i]; void *data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + if (is_delta_type(obj->type)) { nr_deltas++; delta->obj_no = i; delta++; @@@ -655,7 -688,7 +686,7 @@@ struct object_entry *obj = &objects[i]; struct base_data base_obj; - if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) + if (is_delta_type(obj->type)) continue; base_obj.obj = obj; base_obj.data = NULL; @@@ -666,26 -699,26 +697,26 @@@ static int write_compressed(struct sha1file *f, void *in, unsigned int size) { - z_stream stream; + git_zstream stream; int status; unsigned char outbuf[4096]; memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, zlib_compression_level); + git_deflate_init(&stream, zlib_compression_level); stream.next_in = in; stream.avail_in = size; do { stream.next_out = outbuf; stream.avail_out = sizeof(outbuf); - status = deflate(&stream, Z_FINISH); + status = git_deflate(&stream, Z_FINISH); sha1write(f, outbuf, sizeof(outbuf) - stream.avail_out); } while (status == Z_OK); if (status != Z_STREAM_END) die("unable to deflate appended object (%d)", status); size = stream.total_out; - deflateEnd(&stream); + git_deflate_end(&stream); return size; } @@@ -859,24 -892,137 +890,137 @@@ static void final(const char *final_pac static int git_index_pack_config(const char *k, const char *v, void *cb) { + struct pack_idx_option *opts = cb; + if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) - die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + opts->version = git_config_int(k, v); + if (opts->version > 2) + die("bad pack.indexversion=%"PRIu32, opts->version); return 0; } return git_default_config(k, v, cb); } + static int cmp_uint32(const void *a_, const void *b_) + { + uint32_t a = *((uint32_t *)a_); + uint32_t b = *((uint32_t *)b_); + + return (a < b) ? -1 : (a != b); + } + + static void read_v2_anomalous_offsets(struct packed_git *p, + struct pack_idx_option *opts) + { + const uint32_t *idx1, *idx2; + uint32_t i; + + /* The address of the 4-byte offset table */ + idx1 = (((const uint32_t *)p->index_data) + + 2 /* 8-byte header */ + + 256 /* fan out */ + + 5 * p->num_objects /* 20-byte SHA-1 table */ + + p->num_objects /* CRC32 table */ + ); + + /* The address of the 8-byte offset table */ + idx2 = idx1 + p->num_objects; + + for (i = 0; i < p->num_objects; i++) { + uint32_t off = ntohl(idx1[i]); + if (!(off & 0x80000000)) + continue; + off = off & 0x7fffffff; + if (idx2[off * 2]) + continue; + /* + * The real offset is ntohl(idx2[off * 2]) in high 4 + * octets, and ntohl(idx2[off * 2 + 1]) in low 4 + * octets. But idx2[off * 2] is Zero!!! + */ + ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc); + opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]); + } + + if (1 < opts->anomaly_nr) + qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32); + } + + static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) + { + struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); + + if (!p) + die("Cannot open existing pack file '%s'", pack_name); + if (open_pack_index(p)) + die("Cannot open existing pack idx file for '%s'", pack_name); + + /* Read the attributes from the existing idx file */ + opts->version = p->index_version; + + if (opts->version == 2) + read_v2_anomalous_offsets(p, opts); + + /* + * Get rid of the idx file as we do not need it anymore. + * NEEDSWORK: extract this bit from free_pack_by_name() in + * sha1_file.c, perhaps? It shouldn't matter very much as we + * know we haven't installed this pack (hence we never have + * read anything from it). + */ + close_pack_index(p); + free(p); + } + + static void show_pack_info(int stat_only) + { + int i, baseobjects = nr_objects - nr_deltas; + unsigned long *chain_histogram = NULL; + + if (deepest_delta) + chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long)); + + for (i = 0; i < nr_objects; i++) { + struct object_entry *obj = &objects[i]; + + if (is_delta_type(obj->type)) + chain_histogram[obj->delta_depth - 1]++; + if (stat_only) + continue; + printf("%s %-6s %lu %lu %"PRIuMAX, + sha1_to_hex(obj->idx.sha1), + typename(obj->real_type), obj->size, + (unsigned long)(obj[1].idx.offset - obj->idx.offset), + (uintmax_t)obj->idx.offset); + if (is_delta_type(obj->type)) { + struct object_entry *bobj = &objects[obj->base_object_no]; + printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1)); + } + putchar('\n'); + } + + if (baseobjects) + printf("non delta: %d object%s\n", + baseobjects, baseobjects > 1 ? "s" : ""); + for (i = 0; i < deepest_delta; i++) { + if (!chain_histogram[i]) + continue; + printf("chain length = %d: %lu object%s\n", + i + 1, + chain_histogram[i], + chain_histogram[i] > 1 ? "s" : ""); + } + } + int cmd_index_pack(int argc, const char **argv, const char *prefix) { - int i, fix_thin_pack = 0; + int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0; const char *curr_pack, *curr_index; const char *index_name = NULL, *pack_name = NULL; const char *keep_name = NULL, *keep_msg = NULL; char *index_name_buf = NULL, *keep_name_buf = NULL; struct pack_idx_entry **idx_objects; + struct pack_idx_option opts; unsigned char pack_sha1[20]; if (argc == 2 && !strcmp(argv[1], "-h")) @@@ -884,7 -1030,8 +1028,8 @@@ read_replace_refs = 0; - git_config(git_index_pack_config, NULL); + reset_pack_idx_option(&opts); + git_config(git_index_pack_config, &opts); if (prefix && chdir(prefix)) die("Cannot come back to cwd"); @@@ -898,6 -1045,15 +1043,15 @@@ fix_thin_pack = 1; } else if (!strcmp(arg, "--strict")) { strict = 1; + } else if (!strcmp(arg, "--verify")) { + verify = 1; + } else if (!strcmp(arg, "--verify-stat")) { + verify = 1; + stat = 1; + } else if (!strcmp(arg, "--verify-stat-only")) { + verify = 1; + stat = 1; + stat_only = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@@ -923,12 -1079,12 +1077,12 @@@ index_name = argv[++i]; } else if (!prefixcmp(arg, "--index-version=")) { char *c; - pack_idx_default_version = strtoul(arg + 16, &c, 10); - if (pack_idx_default_version > 2) + opts.version = strtoul(arg + 16, &c, 10); + if (opts.version > 2) die("bad %s", arg); if (*c == ',') - pack_idx_off32_limit = strtoul(c+1, &c, 0); - if (*c || pack_idx_off32_limit & 0x80000000) + opts.off32_limit = strtoul(c+1, &c, 0); + if (*c || opts.off32_limit & 0x80000000) die("bad %s", arg); } else usage(index_pack_usage); @@@ -964,11 -1120,17 +1118,17 @@@ strcpy(keep_name_buf + len - 5, ".keep"); keep_name = keep_name_buf; } + if (verify) { + if (!index_name) + die("--verify with no packfile name given"); + read_idx_option(&opts, index_name); + opts.flags |= WRITE_IDX_VERIFY; + } curr_pack = open_pack_file(pack_name); parse_pack_header(); - objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); - deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); + objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); + deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); parse_pack_objects(pack_sha1); if (nr_deltas == nr_resolved_deltas) { stop_progress(&progress); @@@ -1008,16 -1170,22 +1168,22 @@@ if (strict) check_objects(); + if (stat) + show_pack_info(stat_only); + idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; - curr_index = write_idx_file(index_name, idx_objects, nr_objects, pack_sha1); + curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1); free(idx_objects); - final(pack_name, curr_pack, - index_name, curr_index, - keep_name, keep_msg, - pack_sha1); + if (!verify) + final(pack_name, curr_pack, + index_name, curr_index, + keep_name, keep_msg, + pack_sha1); + else + close(input_fd); free(objects); free(index_name_buf); free(keep_name_buf); diff --combined builtin/pack-objects.c index c6e2d8766b,dc471b78c4..84e6dafb12 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -70,6 -70,7 +70,7 @@@ static int local static int incremental; static int ignore_packed_keep; static int allow_ofs_delta; + static struct pack_idx_option pack_idx_opts; static const char *base_name; static int progress = 1; static int window = 10; @@@ -126,13 -127,13 +127,13 @@@ static void *get_delta(struct object_en static unsigned long do_compress(void **pptr, unsigned long size) { - z_stream stream; + git_zstream stream; void *in, *out; unsigned long maxsize; memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, pack_compression_level); - maxsize = deflateBound(&stream, size); + git_deflate_init(&stream, pack_compression_level); + maxsize = git_deflate_bound(&stream, size); in = *pptr; out = xmalloc(maxsize); @@@ -142,9 -143,9 +143,9 @@@ stream.avail_in = size; stream.next_out = out; stream.avail_out = maxsize; - while (deflate(&stream, Z_FINISH) == Z_OK) + while (git_deflate(&stream, Z_FINISH) == Z_OK) ; /* nothing */ - deflateEnd(&stream); + git_deflate_end(&stream); free(in); return stream.total_out; @@@ -160,7 -161,7 +161,7 @@@ static int check_pack_inflate(struct pa off_t len, unsigned long expect) { - z_stream stream; + git_zstream stream; unsigned char fakebuf[4096], *in; int st; @@@ -187,12 -188,12 +188,12 @@@ static void copy_pack_data(struct sha1f off_t len) { unsigned char *in; - unsigned int avail; + unsigned long avail; while (len) { in = use_pack(p, w_curs, offset, &avail); if (avail > len) - avail = (unsigned int)len; + avail = (unsigned long)len; sha1write(f, in, avail); offset += avail; len -= avail; @@@ -493,8 -494,8 +494,8 @@@ static void write_pack_file(void const char *idx_tmp_name; char tmpname[PATH_MAX]; - idx_tmp_name = write_idx_file(NULL, written_list, - nr_written, sha1); + idx_tmp_name = write_idx_file(NULL, written_list, nr_written, + &pack_idx_opts, sha1); snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", base_name, sha1_to_hex(sha1)); @@@ -994,7 -995,7 +995,7 @@@ static void check_object(struct object_ const unsigned char *base_ref = NULL; struct object_entry *base_entry; unsigned long used, used_0; - unsigned int avail; + unsigned long avail; off_t ofs; unsigned char *buf, c; @@@ -1142,12 -1143,8 +1143,12 @@@ static void get_object_details(void sorted_by_offset[i] = objects + i; qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort); - for (i = 0; i < nr_objects; i++) - check_object(sorted_by_offset[i]); + for (i = 0; i < nr_objects; i++) { + struct object_entry *entry = sorted_by_offset[i]; + check_object(entry); + if (big_file_threshold <= entry->size) + entry->no_try_delta = 1; + } free(sorted_by_offset); } @@@ -1884,10 -1881,10 +1885,10 @@@ static int git_pack_config(const char * return 0; } if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) + pack_idx_opts.version = git_config_int(k, v); + if (pack_idx_opts.version > 2) die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + pack_idx_opts.version); return 0; } if (!strcmp(k, "pack.packsizelimit")) { @@@ -2134,6 -2131,7 +2135,7 @@@ int cmd_pack_objects(int argc, const ch rp_av[1] = "--objects"; /* --thin will make it --objects-edge */ rp_ac = 2; + reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); if (!pack_compression_seen && core_compression_seen) pack_compression_level = core_compression_level; @@@ -2278,12 -2276,12 +2280,12 @@@ } if (!prefixcmp(arg, "--index-version=")) { char *c; - pack_idx_default_version = strtoul(arg + 16, &c, 10); - if (pack_idx_default_version > 2) + pack_idx_opts.version = strtoul(arg + 16, &c, 10); + if (pack_idx_opts.version > 2) die("bad %s", arg); if (*c == ',') - pack_idx_off32_limit = strtoul(c+1, &c, 0); - if (*c || pack_idx_off32_limit & 0x80000000) + pack_idx_opts.off32_limit = strtoul(c+1, &c, 0); + if (*c || pack_idx_opts.off32_limit & 0x80000000) die("bad %s", arg); continue; } diff --combined cache.h index 5e80113ee9,edea69e6a6..9e12d55470 --- a/cache.h +++ b/cache.h @@@ -5,8 -5,6 +5,8 @@@ #include "strbuf.h" #include "hash.h" #include "advice.h" +#include "gettext.h" +#include "convert.h" #include SHA1_HEADER #ifndef git_SHA_CTX @@@ -17,27 -15,13 +17,27 @@@ #endif #include -#if defined(NO_DEFLATE_BOUND) || ZLIB_VERNUM < 0x1200 -#define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11) -#endif - -void git_inflate_init(z_streamp strm); -void git_inflate_end(z_streamp strm); -int git_inflate(z_streamp strm, int flush); +typedef struct git_zstream { + z_stream z; + unsigned long avail_in; + unsigned long avail_out; + unsigned long total_in; + unsigned long total_out; + unsigned char *next_in; + unsigned char *next_out; +} git_zstream; + +void git_inflate_init(git_zstream *); +void git_inflate_init_gzip_only(git_zstream *); +void git_inflate_end(git_zstream *); +int git_inflate(git_zstream *, int flush); + +void git_deflate_init(git_zstream *, int level); +void git_deflate_init_gzip(git_zstream *, int level); +void git_deflate_end(git_zstream *); +int git_deflate_end_gently(git_zstream *); +int git_deflate(git_zstream *, int flush); +unsigned long git_deflate_bound(git_zstream *, unsigned long); #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) #define DTYPE(de) ((de)->d_type) @@@ -452,7 -436,6 +452,7 @@@ extern void verify_non_filename(const c #define INIT_DB_QUIET 0x0001 +extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int); extern int init_db(const char *template_dir, unsigned int flags); #define alloc_nr(x) (((x)+16)*3/2) @@@ -520,24 -503,21 +520,24 @@@ extern int ie_modified(const struct ind struct pathspec { const char **raw; /* get_pathspec() result, not freed by free_pathspec() */ int nr; - int has_wildcard:1; - int recursive:1; + unsigned int has_wildcard:1; + unsigned int recursive:1; int max_depth; struct pathspec_item { const char *match; int len; - int has_wildcard:1; + unsigned int use_wildcard:1; } *items; }; extern int init_pathspec(struct pathspec *, const char **); extern void free_pathspec(struct pathspec *); extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec); -extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check); -extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object); + +#define HASH_WRITE_OBJECT 1 +#define HASH_FORMAT_CHECK 2 +extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); +extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ @@@ -562,7 -542,6 +562,7 @@@ extern NORETURN void unable_to_lock_ind extern int hold_lock_file_for_update(struct lock_file *, const char *path, int); extern int hold_lock_file_for_append(struct lock_file *, const char *path, int); extern int commit_lock_file(struct lock_file *); +extern void update_index_if_able(struct index_state *, struct lock_file *); extern int hold_locked_index(struct lock_file *, int); extern int commit_locked_index(struct lock_file *); @@@ -576,12 -555,12 +576,12 @@@ extern int trust_executable_bit extern int trust_ctime; extern int quote_path_fully; extern int has_symlinks; +extern int minimum_abbrev, default_abbrev; extern int ignore_case; extern int assume_unchanged; extern int prefer_symlink_refs; extern int log_all_ref_updates; extern int warn_ambiguous_refs; -extern int unique_abbrev_extra_length; extern int shared_repository; extern const char *apply_default_whitespace; extern const char *apply_default_ignorewhitespace; @@@ -591,12 -570,40 +591,12 @@@ extern int core_compression_seen extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; +extern unsigned long big_file_threshold; extern int read_replace_refs; extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; -enum safe_crlf { - SAFE_CRLF_FALSE = 0, - SAFE_CRLF_FAIL = 1, - SAFE_CRLF_WARN = 2 -}; - -extern enum safe_crlf safe_crlf; - -enum auto_crlf { - AUTO_CRLF_FALSE = 0, - AUTO_CRLF_TRUE = 1, - AUTO_CRLF_INPUT = -1, -}; - -extern enum auto_crlf auto_crlf; - -enum eol { - EOL_UNSET, - EOL_CRLF, - EOL_LF, -#ifdef NATIVE_CRLF - EOL_NATIVE = EOL_CRLF -#else - EOL_NATIVE = EOL_LF -#endif -}; - -extern enum eol eol; - enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, BRANCH_TRACK_NEVER = 0, @@@ -665,24 -672,14 +665,24 @@@ extern char *sha1_pack_name(const unsig extern char *sha1_pack_index_name(const unsigned char *sha1); extern const char *find_unique_abbrev(const unsigned char *sha1, int); extern const unsigned char null_sha1[20]; -static inline int is_null_sha1(const unsigned char *sha1) + +static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) { - return !memcmp(sha1, null_sha1, 20); + int i; + + for (i = 0; i < 20; i++, sha1++, sha2++) { + if (*sha1 != *sha2) + return *sha1 - *sha2; + } + + return 0; } -static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) + +static inline int is_null_sha1(const unsigned char *sha1) { - return memcmp(sha1, sha2, 20); + return !hashcmp(sha1, null_sha1); } + static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src) { memcpy(sha_dst, sha_src, 20); @@@ -728,47 -725,33 +728,47 @@@ int set_shared_perm(const char *path, i #define adjust_shared_perm(path) set_shared_perm((path), 0) int safe_create_leading_directories(char *path); int safe_create_leading_directories_const(const char *path); +int mkdir_in_gitdir(const char *path); extern char *expand_user_path(const char *path); char *enter_repo(char *path, int strict); static inline int is_absolute_path(const char *path) { - return path[0] == '/' || has_dos_drive_prefix(path); + return is_dir_sep(path[0]) || has_dos_drive_prefix(path); } int is_directory(const char *); -const char *make_absolute_path(const char *path); -const char *make_nonrelative_path(const char *path); -const char *make_relative_path(const char *abs, const char *base); +const char *real_path(const char *path); +const char *absolute_path(const char *path); +const char *relative_path(const char *abs, const char *base); int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, const char *prefix_list); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); -/* Read and unpack a sha1 file into memory, write memory to a sha1 file */ -extern int sha1_object_info(const unsigned char *, unsigned long *); -extern void *read_sha1_file_repl(const unsigned char *sha1, enum object_type *type, unsigned long *size, const unsigned char **replacement); +/* object replacement */ +#define READ_SHA1_FILE_REPLACE 1 +extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { - return read_sha1_file_repl(sha1, type, size, NULL); + return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE); +} +extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); +static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) +{ + if (!read_replace_refs) + return sha1; + return do_lookup_replace_object(sha1); } + +/* Read and unpack a sha1 file into memory, write memory to a sha1 file */ +extern int sha1_object_info(const unsigned char *, unsigned long *); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); +extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size); +extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); +extern int parse_sha1_header(const char *hdr, unsigned long *sizep); /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; @@@ -792,8 -775,8 +792,8 @@@ static inline unsigned int hexval(unsig } /* Convert to/from hex/sha1 representation */ -#define MINIMUM_ABBREV 4 -#define DEFAULT_ABBREV 7 +#define MINIMUM_ABBREV minimum_abbrev +#define DEFAULT_ABBREV default_abbrev struct object_context { unsigned char tree[20]; @@@ -802,15 -785,15 +802,15 @@@ }; extern int get_sha1(const char *str, unsigned char *sha1); -extern int get_sha1_with_mode_1(const char *str, unsigned char *sha1, unsigned *mode, int gently, const char *prefix); +extern int get_sha1_with_mode_1(const char *str, unsigned char *sha1, unsigned *mode, int only_to_die, const char *prefix); static inline int get_sha1_with_mode(const char *str, unsigned char *sha1, unsigned *mode) { - return get_sha1_with_mode_1(str, sha1, mode, 1, NULL); + return get_sha1_with_mode_1(str, sha1, mode, 0, NULL); } -extern int get_sha1_with_context_1(const char *name, unsigned char *sha1, struct object_context *orc, int gently, const char *prefix); +extern int get_sha1_with_context_1(const char *name, unsigned char *sha1, struct object_context *orc, int only_to_die, const char *prefix); static inline int get_sha1_with_context(const char *str, unsigned char *sha1, struct object_context *orc) { - return get_sha1_with_context_1(str, sha1, orc, 1, NULL); + return get_sha1_with_context_1(str, sha1, orc, 0, NULL); } extern int get_sha1_hex(const char *hex, unsigned char *sha1); extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */ @@@ -931,8 -914,7 +931,8 @@@ extern struct packed_git time_t mtime; int pack_fd; unsigned pack_local:1, - pack_keep:1; + pack_keep:1, + do_not_close:1; unsigned char sha1[20]; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ @@@ -977,7 -959,6 +977,7 @@@ extern struct ref *find_ref_by_name(con extern char *git_getpass(const char *prompt); extern struct child_process *git_connect(int fd[2], const char *url, const char *prog, int flags); extern int finish_connect(struct child_process *conn); +extern int git_connection_is_socket(struct child_process *conn); extern int path_match(const char *path, int nr, char **match); struct extra_have_objects { int nr, alloc; @@@ -998,7 -979,7 +998,7 @@@ extern struct packed_git *find_sha1_pac extern void pack_report(void); extern int open_pack_index(struct packed_git *); extern void close_pack_index(struct packed_git *); -extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *); +extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); extern void unuse_pack(struct pack_window **); extern void free_pack_by_name(const char *); @@@ -1010,55 -991,16 +1010,54 @@@ extern off_t find_pack_entry_one(const extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); - extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); + +struct object_info { + /* Request */ + unsigned long *sizep; + + /* Response */ + enum { + OI_CACHED, + OI_LOOSE, + OI_PACKED, + OI_DBCACHED + } whence; + union { + /* + * struct { + * ... Nothing to expose in this case + * } cached; + * struct { + * ... Nothing to expose in this case + * } loose; + */ + struct { + struct packed_git *pack; + off_t offset; + unsigned int is_delta; + } packed; + } u; +}; +extern int sha1_object_info_extended(const unsigned char *, struct object_info *); /* Dumb servers support */ extern int update_server_info(int); +/* git_config_parse_key() returns these negated: */ +#define CONFIG_INVALID_KEY 1 +#define CONFIG_NO_SECTION_OR_NAME 2 +/* git_config_set(), git_config_set_multivar() return the above or these: */ +#define CONFIG_NO_LOCK -1 +#define CONFIG_INVALID_FILE 3 +#define CONFIG_NO_WRITE 4 +#define CONFIG_NOTHING_SET 5 +#define CONFIG_INVALID_PATTERN 6 + typedef int (*config_fn_t)(const char *, const char *, void *); extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); extern void git_config_push_parameter(const char *text); -extern int git_config_parse_parameter(const char *text); -extern int git_config_parse_environment(void); extern int git_config_from_parameters(config_fn_t fn, void *data); extern int git_config(config_fn_t fn, void *); extern int git_config_early(config_fn_t fn, void *, const char *repo_config); @@@ -1078,12 -1020,11 +1077,12 @@@ extern const char *git_etc_gitconfig(vo extern int check_repository_format_version(const char *var, const char *value, void *cb); extern int git_env_bool(const char *, int); extern int git_config_system(void); -extern int git_config_global(void); extern int config_error_nonbool(const char *); extern const char *get_log_output_encoding(void); extern const char *get_commit_output_encoding(void); +extern int git_config_parse_parameter(const char *, config_fn_t fn, void *data); + extern const char *config_exclusive_filename; #define MAX_GITNAME (1000) @@@ -1141,14 -1082,16 +1140,14 @@@ extern void alloc_report(void) /* trace.c */ __attribute__((format (printf, 1, 2))) extern void trace_printf(const char *format, ...); +extern void trace_vprintf(const char *key, const char *format, va_list ap); __attribute__((format (printf, 2, 3))) extern void trace_argv_printf(const char **argv, const char *format, ...); extern void trace_repo_setup(const char *prefix); +extern int trace_want(const char *key); +extern void trace_strbuf(const char *key, const struct strbuf *buf); -/* convert.c */ -/* returns 1 if *dst was used */ -extern int convert_to_git(const char *path, const char *src, size_t len, - struct strbuf *dst, enum safe_crlf checksafe); -extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); -extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); +void packet_trace_identity(const char *prog); /* add */ /* diff --combined csum-file.c index be49d5fcf9,f70e3dd7b5..fc97d6e045 --- a/csum-file.c +++ b/csum-file.c @@@ -11,8 -11,20 +11,20 @@@ #include "progress.h" #include "csum-file.h" - static void flush(struct sha1file *f, void * buf, unsigned int count) + static void flush(struct sha1file *f, void *buf, unsigned int count) { + if (0 <= f->check_fd && count) { + unsigned char check_buffer[8192]; + ssize_t ret = read_in_full(f->check_fd, check_buffer, count); + + if (ret < 0) + die_errno("%s: sha1 file read error", f->name); + if (ret < count) + die("%s: sha1 file truncated", f->name); + if (memcmp(buf, check_buffer, count)) + die("sha1 file '%s' validation error", f->name); + } + for (;;) { int ret = xwrite(f->fd, buf, count); if (ret > 0) { @@@ -59,6 -71,17 +71,17 @@@ int sha1close(struct sha1file *f, unsig fd = 0; } else fd = f->fd; + if (0 <= f->check_fd) { + char discard; + int cnt = read_in_full(f->check_fd, &discard, 1); + if (cnt < 0) + die_errno("%s: error when reading the tail of sha1 file", + f->name); + if (cnt) + die("%s: sha1 file has trailing garbage", f->name); + if (close(f->check_fd)) + die_errno("%s: sha1 file error on close", f->name); + } free(f); return fd; } @@@ -101,10 -124,31 +124,31 @@@ struct sha1file *sha1fd(int fd, const c return sha1fd_throughput(fd, name, NULL); } + struct sha1file *sha1fd_check(const char *name) + { + int sink, check; + struct sha1file *f; + + sink = open("/dev/null", O_WRONLY); + if (sink < 0) + return NULL; + check = open(name, O_RDONLY); + if (check < 0) { + int saved_errno = errno; + close(sink); + errno = saved_errno; + return NULL; + } + f = sha1fd(sink, name); + f->check_fd = check; + return f; + } + struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp) { struct sha1file *f = xmalloc(sizeof(*f)); f->fd = fd; + f->check_fd = -1; f->offset = 0; f->total = 0; f->tp = tp; @@@ -116,7 -160,7 +160,7 @@@ void crc32_begin(struct sha1file *f) { - f->crc32 = crc32(0, Z_NULL, 0); + f->crc32 = crc32(0, NULL, 0); f->do_crc = 1; } diff --combined fast-import.c index 1d5e3336a5,91e936d1b9..9e8d1868aa --- a/fast-import.c +++ b/fast-import.c @@@ -24,12 -24,10 +24,12 @@@ Format of STDIN stream commit_msg ('from' sp committish lf)? ('merge' sp committish lf)* - file_change* + (file_change | ls)* lf?; commit_msg ::= data; + ls ::= 'ls' sp '"' quoted(path) '"' lf; + file_change ::= file_clr | file_del | file_rnm @@@ -134,7 -132,7 +134,7 @@@ ts ::= # time since the epoch in seconds, ascii base10 notation; tz ::= # GIT style timezone; - # note: comments and cat requests may appear anywhere + # note: comments, ls and cat requests may appear anywhere # in the input, except within a data command. Any form # of the data command always escapes the related input # from comment processing. @@@ -143,9 -141,7 +143,9 @@@ # must be the first character on that line (an lf # preceded it). # + cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; + ls_tree ::= 'ls' sp (hexsha1 | idnum) sp path_str lf; comment ::= '#' not_lf* lf; not_lf ::= # Any byte that is not ASCII newline (LF); @@@ -170,7 -166,8 +170,7 @@@ #define DEPTH_BITS 13 #define MAX_DEPTH ((1<pack_name, tmpfile); p->pack_fd = pack_fd; + p->do_not_close = 1; pack_file = sha1fd(pack_fd, p->pack_name); hdr.hdr_signature = htonl(PACK_SIGNATURE); @@@ -896,7 -906,7 +897,7 @@@ static const char *create_index(void if (c != last) die("internal consistency error creating the index"); - tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1); + tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1); free(idx); return tmpfile; } @@@ -1017,7 -1027,7 +1018,7 @@@ static int store_object unsigned char sha1[20]; unsigned long hdrlen, deltalen; git_SHA_CTX c; - z_stream s; + git_zstream s; hdrlen = sprintf((char *)hdr,"%s %lu", typename(type), (unsigned long)dat->len) + 1; @@@ -1050,7 -1060,7 +1051,7 @@@ delta = NULL; memset(&s, 0, sizeof(s)); - deflateInit(&s, pack_compression_level); + git_deflate_init(&s, pack_compression_level); if (delta) { s.next_in = delta; s.avail_in = deltalen; @@@ -1058,11 -1068,11 +1059,11 @@@ s.next_in = (void *)dat->buf; s.avail_in = dat->len; } - s.avail_out = deflateBound(&s, s.avail_in); + s.avail_out = git_deflate_bound(&s, s.avail_in); s.next_out = out = xmalloc(s.avail_out); - while (deflate(&s, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&s); + while (git_deflate(&s, Z_FINISH) == Z_OK) + ; /* nothing */ + git_deflate_end(&s); /* Determine if we should auto-checkpoint. */ if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize) @@@ -1078,14 -1088,14 +1079,14 @@@ delta = NULL; memset(&s, 0, sizeof(s)); - deflateInit(&s, pack_compression_level); + git_deflate_init(&s, pack_compression_level); s.next_in = (void *)dat->buf; s.avail_in = dat->len; - s.avail_out = deflateBound(&s, s.avail_in); + s.avail_out = git_deflate_bound(&s, s.avail_in); s.next_out = out = xrealloc(out, s.avail_out); - while (deflate(&s, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&s); + while (git_deflate(&s, Z_FINISH) == Z_OK) + ; /* nothing */ + git_deflate_end(&s); } } @@@ -1163,7 -1173,7 +1164,7 @@@ static void stream_blob(uintmax_t len, off_t offset; git_SHA_CTX c; git_SHA_CTX pack_file_ctx; - z_stream s; + git_zstream s; int status = Z_OK; /* Determine if we should auto-checkpoint. */ @@@ -1187,7 -1197,7 +1188,7 @@@ crc32_begin(pack_file); memset(&s, 0, sizeof(s)); - deflateInit(&s, pack_compression_level); + git_deflate_init(&s, pack_compression_level); hdrlen = encode_in_pack_object_header(OBJ_BLOB, len, out_buf); if (out_sz <= hdrlen) @@@ -1209,7 -1219,7 +1210,7 @@@ len -= n; } - status = deflate(&s, len ? 0 : Z_FINISH); + status = git_deflate(&s, len ? 0 : Z_FINISH); if (!s.avail_out || status == Z_STREAM_END) { size_t n = s.next_out - out_buf; @@@ -1228,7 -1238,7 +1229,7 @@@ die("unexpected deflate failure: %d", status); } } - deflateEnd(&s); + git_deflate_end(&s); git_SHA1_Final(sha1, &c); if (sha1out) @@@ -2605,8 -2615,6 +2606,8 @@@ static void parse_new_commit(void note_change_n(b, prev_fanout); else if (!strcmp("deleteall", command_buf.buf)) file_change_deleteall(b); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(b); else { unread_command_buf = 1; break; @@@ -2830,153 -2838,6 +2831,153 @@@ static void parse_cat_blob(void cat_blob(oe, sha1); } +static struct object_entry *dereference(struct object_entry *oe, + unsigned char sha1[20]) +{ + unsigned long size; + char *buf = NULL; + if (!oe) { + enum object_type type = sha1_object_info(sha1, NULL); + if (type < 0) + die("object not found: %s", sha1_to_hex(sha1)); + /* cache it! */ + oe = insert_object(sha1); + oe->type = type; + oe->pack_id = MAX_PACK_ID; + oe->idx.offset = 1; + } + switch (oe->type) { + case OBJ_TREE: /* easy case. */ + return oe; + case OBJ_COMMIT: + case OBJ_TAG: + break; + default: + die("Not a treeish: %s", command_buf.buf); + } + + if (oe->pack_id != MAX_PACK_ID) { /* in a pack being written */ + buf = gfi_unpack_entry(oe, &size); + } else { + enum object_type unused; + buf = read_sha1_file(sha1, &unused, &size); + } + if (!buf) + die("Can't load object %s", sha1_to_hex(sha1)); + + /* Peel one layer. */ + switch (oe->type) { + case OBJ_TAG: + if (size < 40 + strlen("object ") || + get_sha1_hex(buf + strlen("object "), sha1)) + die("Invalid SHA1 in tag: %s", command_buf.buf); + break; + case OBJ_COMMIT: + if (size < 40 + strlen("tree ") || + get_sha1_hex(buf + strlen("tree "), sha1)) + die("Invalid SHA1 in commit: %s", command_buf.buf); + } + + free(buf); + return find_object(sha1); +} + +static struct object_entry *parse_treeish_dataref(const char **p) +{ + unsigned char sha1[20]; + struct object_entry *e; + + if (**p == ':') { /* */ + char *endptr; + e = find_mark(strtoumax(*p + 1, &endptr, 10)); + if (endptr == *p + 1) + die("Invalid mark: %s", command_buf.buf); + if (!e) + die("Unknown mark: %s", command_buf.buf); + *p = endptr; + hashcpy(sha1, e->idx.sha1); + } else { /* */ + if (get_sha1_hex(*p, sha1)) + die("Invalid SHA1: %s", command_buf.buf); + e = find_object(sha1); + *p += 40; + } + + while (!e || e->type != OBJ_TREE) + e = dereference(e, sha1); + return e; +} + +static void print_ls(int mode, const unsigned char *sha1, const char *path) +{ + static struct strbuf line = STRBUF_INIT; + + /* See show_tree(). */ + const char *type = + S_ISGITLINK(mode) ? commit_type : + S_ISDIR(mode) ? tree_type : + blob_type; + + if (!mode) { + /* missing SP path LF */ + strbuf_reset(&line); + strbuf_addstr(&line, "missing "); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } else { + /* mode SP type SP object_name TAB path LF */ + strbuf_reset(&line); + strbuf_addf(&line, "%06o %s %s\t", + mode, type, sha1_to_hex(sha1)); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } + cat_blob_write(line.buf, line.len); +} + +static void parse_ls(struct branch *b) +{ + const char *p; + struct tree_entry *root = NULL; + struct tree_entry leaf = {NULL}; + + /* ls SP ( SP)? */ + p = command_buf.buf + strlen("ls "); + if (*p == '"') { + if (!b) + die("Not in a commit: %s", command_buf.buf); + root = &b->branch_tree; + } else { + struct object_entry *e = parse_treeish_dataref(&p); + root = new_tree_entry(); + hashcpy(root->versions[1].sha1, e->idx.sha1); + load_tree(root); + if (*p++ != ' ') + die("Missing space after tree-ish: %s", command_buf.buf); + } + if (*p == '"') { + static struct strbuf uq = STRBUF_INIT; + const char *endp; + strbuf_reset(&uq); + if (unquote_c_style(&uq, p, &endp)) + die("Invalid path: %s", command_buf.buf); + if (*endp) + die("Garbage after path in: %s", command_buf.buf); + p = uq.buf; + } + tree_content_get(root, p, &leaf); + /* + * A directory in preparation would have a sha1 of zero + * until it is saved. Save, for simplicity. + */ + if (S_ISDIR(leaf.versions[1].mode)) + store_tree(&leaf); + + print_ls(leaf.versions[1].mode, leaf.versions[1].sha1, p); + if (!b || root != &b->branch_tree) + release_tree_entry(root); +} + static void checkpoint(void) { checkpoint_requested = 0; @@@ -3135,13 -2996,13 +3136,13 @@@ static int parse_one_feature(const cha option_export_marks(feature + 13); } else if (!strcmp(feature, "cat-blob")) { ; /* Don't die - this feature is supported */ - } else if (!prefixcmp(feature, "relative-marks")) { + } else if (!strcmp(feature, "relative-marks")) { relative_marks_paths = 1; - } else if (!prefixcmp(feature, "no-relative-marks")) { + } else if (!strcmp(feature, "no-relative-marks")) { relative_marks_paths = 0; - } else if (!prefixcmp(feature, "force")) { + } else if (!strcmp(feature, "force")) { force_update = 1; - } else if (!strcmp(feature, "notes")) { + } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) { ; /* do nothing; we have the feature */ } else { return 0; @@@ -3195,16 -3056,20 +3196,16 @@@ static int git_pack_config(const char * return 0; } if (!strcmp(k, "pack.indexversion")) { - pack_idx_default_version = git_config_int(k, v); - if (pack_idx_default_version > 2) + pack_idx_opts.version = git_config_int(k, v); + if (pack_idx_opts.version > 2) die("bad pack.indexversion=%"PRIu32, - pack_idx_default_version); + pack_idx_opts.version); return 0; } if (!strcmp(k, "pack.packsizelimit")) { max_packsize = git_config_ulong(k, v); return 0; } - if (!strcmp(k, "core.bigfilethreshold")) { - long n = git_config_int(k, v); - big_file_threshold = 0 < n ? n : 0; - } return git_default_config(k, v, cb); } @@@ -3252,6 -3117,7 +3253,7 @@@ int main(int argc, const char **argv usage(fast_import_usage); setup_git_directory(); + reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); if (!pack_compression_seen && core_compression_seen) pack_compression_level = core_compression_level; @@@ -3278,8 -3144,6 +3280,8 @@@ while (read_next_command() != EOF) { if (!strcmp("blob", command_buf.buf)) parse_new_blob(); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(NULL); else if (!prefixcmp(command_buf.buf, "commit ")) parse_new_commit(); else if (!prefixcmp(command_buf.buf, "tag ")) diff --combined sha1_file.c index a6aac70923,ca87e3d51b..89d7e5eb57 --- a/sha1_file.c +++ b/sha1_file.c @@@ -11,7 -11,6 +11,7 @@@ #include "pack.h" #include "blob.h" #include "commit.h" +#include "run-command.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@@ -27,11 -26,18 +27,11 @@@ #endif #endif -#ifdef NO_C99_FORMAT -#define SZ_FMT "lu" -static unsigned long sz_fmt(size_t s) { return (unsigned long)s; } -#else -#define SZ_FMT "zu" -static size_t sz_fmt(size_t s) { return s; } -#endif +#define SZ_FMT PRIuMAX +static inline uintmax_t sz_fmt(size_t s) { return s; } const unsigned char null_sha1[20]; -static int git_open_noatime(const char *name, struct packed_git *p); - /* * This is meant to hold a *small* number of objects that you would * want read_sha1_file() to be able to return, but yet you do not want @@@ -67,35 -73,6 +67,35 @@@ static struct cached_object *find_cache return NULL; } +int mkdir_in_gitdir(const char *path) +{ + if (mkdir(path, 0777)) { + int saved_errno = errno; + struct stat st; + struct strbuf sb = STRBUF_INIT; + + if (errno != EEXIST) + return -1; + /* + * Are we looking at a path in a symlinked worktree + * whose original repository does not yet have it? + * e.g. .git/rr-cache pointing at its original + * repository in which the user hasn't performed any + * conflict resolution yet? + */ + if (lstat(path, &st) || !S_ISLNK(st.st_mode) || + strbuf_readlink(&sb, path, st.st_size) || + !is_absolute_path(sb.buf) || + mkdir(sb.buf, 0777)) { + strbuf_release(&sb); + errno = saved_errno; + return -1; + } + strbuf_release(&sb); + } + return adjust_shared_perm(path); +} + int safe_create_leading_directories(char *path) { char *pos = path + offset_1st_component(path); @@@ -226,7 -203,6 +226,7 @@@ struct alternate_object_database *alt_o static struct alternate_object_database **alt_odb_tail; static void read_info_alternates(const char * alternates, int depth); +static int git_open_noatime(const char *name); /* * Prepare alternate object database registry. @@@ -360,7 -336,7 +360,7 @@@ static void read_info_alternates(const int fd; sprintf(path, "%s/%s", relative_base, alt_file_name); - fd = git_open_noatime(path, NULL); + fd = git_open_noatime(path); if (fd < 0) return; if (fstat(fd, &st) || (st.st_size == 0)) { @@@ -442,8 -418,6 +442,8 @@@ static unsigned int pack_used_ctr static unsigned int pack_mmap_calls; static unsigned int peak_pack_open_windows; static unsigned int pack_open_windows; +static unsigned int pack_open_fds; +static unsigned int pack_max_fds; static size_t peak_pack_mapped; static size_t pack_mapped; struct packed_git *packed_git; @@@ -475,7 -449,7 +475,7 @@@ static int check_packed_git_idx(const c struct pack_idx_header *hdr; size_t idx_size; uint32_t version, nr, i, *index; - int fd = git_open_noatime(path, p); + int fd = git_open_noatime(path); struct stat st; if (fd < 0) @@@ -621,10 -595,8 +621,10 @@@ static int unuse_one_window(struct pack lru_l->next = lru_w->next; else { lru_p->windows = lru_w->next; - if (!lru_p->windows && lru_p->pack_fd != keep_fd) { + if (!lru_p->windows && lru_p->pack_fd != -1 + && lru_p->pack_fd != keep_fd) { close(lru_p->pack_fd); + pack_open_fds--; lru_p->pack_fd = -1; } } @@@ -709,10 -681,8 +709,10 @@@ void free_pack_by_name(const char *pack if (strcmp(pack_name, p->pack_name) == 0) { clear_delta_base_cache(); close_pack_windows(p); - if (p->pack_fd != -1) + if (p->pack_fd != -1) { close(p->pack_fd); + pack_open_fds--; + } close_pack_index(p); free(p->bad_object_sha1); *pp = p->next; @@@ -738,29 -708,9 +738,29 @@@ static int open_packed_git_1(struct pac if (!p->index_data && open_pack_index(p)) return error("packfile %s index unavailable", p->pack_name); - p->pack_fd = git_open_noatime(p->pack_name, p); + if (!pack_max_fds) { + struct rlimit lim; + unsigned int max_fds; + + if (getrlimit(RLIMIT_NOFILE, &lim)) + die_errno("cannot get RLIMIT_NOFILE"); + + max_fds = lim.rlim_cur; + + /* Save 3 for stdin/stdout/stderr, 22 for work */ + if (25 < max_fds) + pack_max_fds = max_fds - 25; + else + pack_max_fds = 1; + } + + while (pack_max_fds <= pack_open_fds && unuse_one_window(NULL, -1)) + ; /* nothing */ + + p->pack_fd = git_open_noatime(p->pack_name); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) return -1; + pack_open_fds++; /* If we created the struct before we had the pack we lack size. */ if (!p->pack_size) { @@@ -812,7 -762,6 +812,7 @@@ static int open_packed_git(struct packe return 0; if (p->pack_fd != -1) { close(p->pack_fd); + pack_open_fds--; p->pack_fd = -1; } return -1; @@@ -834,17 -783,18 +834,17 @@@ static int in_window(struct pack_windo unsigned char *use_pack(struct packed_git *p, struct pack_window **w_cursor, off_t offset, - unsigned int *left) + unsigned long *left) { struct pack_window *win = *w_cursor; - if (p->pack_fd == -1 && open_packed_git(p)) - die("packfile %s cannot be accessed", p->pack_name); - /* Since packfiles end in a hash of their content and it's * pointless to ask for an offset into the middle of that * hash, and the in_window function above wouldn't match * don't allow an offset too close to the end of the file. */ + if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) + die("packfile %s cannot be accessed", p->pack_name); if (offset > (p->pack_size - 20)) die("offset beyond end of packfile (truncated pack?)"); @@@ -858,10 -808,6 +858,10 @@@ if (!win) { size_t window_align = packed_git_window_size / 2; off_t len; + + if (p->pack_fd == -1 && open_packed_git(p)) + die("packfile %s cannot be accessed", p->pack_name); + win = xcalloc(1, sizeof(*win)); win->offset = (offset / window_align) * window_align; len = p->pack_size - win->offset; @@@ -879,12 -825,6 +879,12 @@@ die("packfile %s cannot be mapped: %s", p->pack_name, strerror(errno)); + if (!win->offset && win->len == p->pack_size + && !p->do_not_close) { + close(p->pack_fd); + pack_open_fds--; + p->pack_fd = -1; + } pack_mmap_calls++; pack_open_windows++; if (pack_mapped > peak_pack_mapped) @@@ -979,9 -919,6 +979,9 @@@ struct packed_git *parse_pack_index(uns void install_packed_git(struct packed_git *pack) { + if (pack->pack_fd != -1) + pack_open_fds++; + pack->next = packed_git; packed_git = pack; } @@@ -999,6 -936,8 +999,6 @@@ static void prepare_packed_git_one(cha sprintf(path, "%s/pack", objdir); len = strlen(path); dir = opendir(path); - while (!dir && errno == EMFILE && unuse_one_window(NULL, -1)) - dir = opendir(path); if (!dir) { if (errno != ENOENT) error("unable to open object pack directory: %s: %s", @@@ -1145,7 -1084,7 +1145,7 @@@ int check_sha1_signature(const unsigne return hashcmp(sha1, real_sha1) ? -1 : 0; } -static int git_open_noatime(const char *name, struct packed_git *p) +static int git_open_noatime(const char *name) { static int sha1_file_open_flag = O_NOATIME; @@@ -1154,6 -1093,14 +1154,6 @@@ if (fd >= 0) return fd; - /* Might the failure be insufficient file descriptors? */ - if (errno == EMFILE) { - if (unuse_one_window(p, -1)) - continue; - else - return -1; - } - /* Might the failure be due to O_NOATIME? */ if (errno != ENOENT && sha1_file_open_flag) { sha1_file_open_flag = 0; @@@ -1170,7 -1117,7 +1170,7 @@@ static int open_sha1_file(const unsigne char *name = sha1_file_name(sha1); struct alternate_object_database *alt; - fd = git_open_noatime(name, NULL); + fd = git_open_noatime(name); if (fd >= 0) return fd; @@@ -1179,14 -1126,14 +1179,14 @@@ for (alt = alt_odb_list; alt; alt = alt->next) { name = alt->name; fill_sha1_path(name, sha1); - fd = git_open_noatime(alt->base, NULL); + fd = git_open_noatime(alt->base); if (fd >= 0) return fd; } return -1; } -static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +void *map_sha1_file(const unsigned char *sha1, unsigned long *size) { void *map; int fd; @@@ -1205,29 -1152,20 +1205,29 @@@ return map; } -static int legacy_loose_object(unsigned char *map) +/* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we no longer write loose objects in that + * format. + */ +static int experimental_loose_object(unsigned char *map) { unsigned int word; /* * Is it a zlib-compressed buffer? If so, the first byte * must be 0x78 (15-bit window size, deflated), and the - * first 16-bit word is evenly divisible by 31 + * first 16-bit word is evenly divisible by 31. If so, + * we are looking at the official format, not the experimental + * one. */ word = (map[0] << 8) + map[1]; if (map[0] == 0x78 && !(word % 31)) - return 1; - else return 0; + else + return 1; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@@ -1254,7 -1192,7 +1254,7 @@@ return used; } -static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) +int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) { unsigned long size, used; static const char valid_loose_object_type[8] = { @@@ -1271,32 -1209,37 +1271,32 @@@ stream->next_out = buffer; stream->avail_out = bufsiz; - if (legacy_loose_object(map)) { - git_inflate_init(stream); - return git_inflate(stream, 0); - } - + if (experimental_loose_object(map)) { + /* + * The old experimental format we no longer produce; + * we can still read it. + */ + used = unpack_object_header_buffer(map, mapsize, &type, &size); + if (!used || !valid_loose_object_type[type]) + return -1; + map += used; + mapsize -= used; - /* - * There used to be a second loose object header format which - * was meant to mimic the in-pack format, allowing for direct - * copy of the object data. This format turned up not to be - * really worth it and we don't write it any longer. But we - * can still read it. - */ - used = unpack_object_header_buffer(map, mapsize, &type, &size); - if (!used || !valid_loose_object_type[type]) - return -1; - map += used; - mapsize -= used; + /* Set up the stream for the rest.. */ + stream->next_in = map; + stream->avail_in = mapsize; + git_inflate_init(stream); - /* Set up the stream for the rest.. */ - stream->next_in = map; - stream->avail_in = mapsize; + /* And generate the fake traditional header */ + stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", + typename(type), size); + return 0; + } git_inflate_init(stream); - - /* And generate the fake traditional header */ - stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", - typename(type), size); - return 0; + return git_inflate(stream, 0); } -static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size, const unsigned char *sha1) +static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) { int bytes = strlen(buffer) + 1; unsigned char *buf = xmallocz(size); @@@ -1312,7 -1255,7 +1312,7 @@@ /* * The above condition must be (bytes <= size), not * (bytes < size). In other words, even though we - * expect no more output and set avail_out to zer0, + * expect no more output and set avail_out to zero, * the input zlib stream may have bytes that express * "this concludes the stream", and we *do* want to * eat that input. @@@ -1346,7 -1289,7 +1346,7 @@@ * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_sha1_header(const char *hdr, unsigned long *sizep) +int parse_sha1_header(const char *hdr, unsigned long *sizep) { char type[10]; int i; @@@ -1395,7 -1338,7 +1395,7 @@@ static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) { int ret; - z_stream stream; + git_zstream stream; char hdr[8192]; ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)); @@@ -1411,7 -1354,7 +1411,7 @@@ unsigned long get_size_from_delta(struc { const unsigned char *data; unsigned char delta_head[20], *in; - z_stream stream; + git_zstream stream; int st; memset(&stream, 0, sizeof(stream)); @@@ -1485,7 -1428,7 +1485,7 @@@ static off_t get_delta_base(struct pack /* forward declaration for a mutually recursive function */ static int packed_object_info(struct packed_git *p, off_t offset, - unsigned long *sizep); + unsigned long *sizep, int *rtype); static int packed_delta_info(struct packed_git *p, struct pack_window **w_curs, @@@ -1499,7 -1442,7 +1499,7 @@@ base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); if (!base_offset) return OBJ_BAD; - type = packed_object_info(p, base_offset, NULL); + type = packed_object_info(p, base_offset, NULL, NULL); if (type <= OBJ_NONE) { struct revindex_entry *revidx; const unsigned char *base_sha1; @@@ -1527,18 -1470,18 +1527,18 @@@ return type; } -static int unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - unsigned long *sizep) +int unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + unsigned long *sizep) { unsigned char *base; - unsigned int left; + unsigned long left; unsigned long used; enum object_type type; /* use_pack() assures us we have [base, base + 20) available - * as a range that we can look at at. (Its actually the hash + * as a range that we can look at. (Its actually the hash * size that is assured.) With our object header encoding * the maximum deflated object size is 2^137, which is just * insane, so we know won't exceed what we have been given. @@@ -1553,63 -1496,8 +1553,8 @@@ return type; } - int packed_object_info_detail(struct packed_git *p, - off_t obj_offset, - unsigned long *size, - unsigned long *store_size, - unsigned int *delta_chain_length, - unsigned char *base_sha1) - { - struct pack_window *w_curs = NULL; - off_t curpos; - unsigned long dummy; - unsigned char *next_sha1; - enum object_type type; - struct revindex_entry *revidx; - - *delta_chain_length = 0; - curpos = obj_offset; - type = unpack_object_header(p, &w_curs, &curpos, size); - - revidx = find_pack_revindex(p, obj_offset); - *store_size = revidx[1].offset - obj_offset; - - for (;;) { - switch (type) { - default: - die("pack %s contains unknown object type %d", - p->pack_name, type); - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - unuse_pack(&w_curs); - return type; - case OBJ_OFS_DELTA: - obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); - if (!obj_offset) - die("pack %s contains bad delta base reference of type %s", - p->pack_name, typename(type)); - if (*delta_chain_length == 0) { - revidx = find_pack_revindex(p, obj_offset); - hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr)); - } - break; - case OBJ_REF_DELTA: - next_sha1 = use_pack(p, &w_curs, curpos, NULL); - if (*delta_chain_length == 0) - hashcpy(base_sha1, next_sha1); - obj_offset = find_pack_entry_one(next_sha1, p); - break; - } - (*delta_chain_length)++; - curpos = obj_offset; - type = unpack_object_header(p, &w_curs, &curpos, &dummy); - } - } - static int packed_object_info(struct packed_git *p, off_t obj_offset, - unsigned long *sizep) + unsigned long *sizep, int *rtype) { struct pack_window *w_curs = NULL; unsigned long size; @@@ -1617,8 -1505,6 +1562,8 @@@ enum object_type type; type = unpack_object_header(p, &w_curs, &curpos, &size); + if (rtype) + *rtype = type; /* representation type */ switch (type) { case OBJ_OFS_DELTA: @@@ -1648,7 -1534,7 +1593,7 @@@ static void *unpack_compressed_entry(st unsigned long size) { int st; - z_stream stream; + git_zstream stream; unsigned char *buffer, *in; buffer = xmallocz(size); @@@ -1701,13 -1587,6 +1646,13 @@@ static unsigned long pack_entry_hash(st return hash % MAX_DELTA_CACHE; } +static int in_delta_base_cache(struct packed_git *p, off_t base_offset) +{ + unsigned long hash = pack_entry_hash(p, base_offset); + struct delta_base_cache_entry *ent = delta_base_cache + hash; + return (ent->data && ent->p == p && ent->base_offset == base_offset); +} + static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, unsigned long *base_size, enum object_type *type, int keep_cache) { @@@ -1998,27 -1877,6 +1943,27 @@@ off_t find_pack_entry_one(const unsigne return 0; } +static int is_pack_valid(struct packed_git *p) +{ + /* An already open pack is known to be valid. */ + if (p->pack_fd != -1) + return 1; + + /* If the pack has one window completely covering the + * file size, the pack is known to be valid even if + * the descriptor is not currently open. + */ + if (p->windows) { + struct pack_window *w = p->windows; + + if (!w->offset && w->len == p->pack_size) + return 1; + } + + /* Force the pack to open to prove its valid. */ + return !open_packed_git(p); +} + static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { static struct packed_git *last_found = (void *)1; @@@ -2048,7 -1906,7 +1993,7 @@@ * it may have been deleted since the index * was loaded! */ - if (p->pack_fd == -1 && open_packed_git(p)) { + if (!is_pack_valid(p)) { error("packfile %s cannot be accessed", p->pack_name); goto next; } @@@ -2088,7 -1946,7 +2033,7 @@@ static int sha1_loose_object_info(cons int status; unsigned long mapsize, size; void *map; - z_stream stream; + git_zstream stream; char hdr[32]; map = map_sha1_file(sha1, &mapsize); @@@ -2106,28 -1964,24 +2051,28 @@@ return status; } -int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +/* returns enum object_type or negative */ +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi) { struct cached_object *co; struct pack_entry e; - int status; + int status, rtype; co = find_cached_object(sha1); if (co) { - if (sizep) - *sizep = co->size; + if (oi->sizep) + *(oi->sizep) = co->size; + oi->whence = OI_CACHED; return co->type; } if (!find_pack_entry(sha1, &e)) { /* Most likely it's a loose object. */ - status = sha1_loose_object_info(sha1, sizep); - if (status >= 0) + status = sha1_loose_object_info(sha1, oi->sizep); + if (status >= 0) { + oi->whence = OI_LOOSE; return status; + } /* Not a loose object; someone else may have just packed it. */ reprepare_packed_git(); @@@ -2135,31 -1989,15 +2080,31 @@@ return status; } - status = packed_object_info(e.p, e.offset, sizep); + status = packed_object_info(e.p, e.offset, oi->sizep, &rtype); if (status < 0) { mark_bad_packed_object(e.p, sha1); - status = sha1_object_info(sha1, sizep); + status = sha1_object_info_extended(sha1, oi); + } else if (in_delta_base_cache(e.p, e.offset)) { + oi->whence = OI_DBCACHED; + } else { + oi->whence = OI_PACKED; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); } return status; } +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +{ + struct object_info oi; + + oi.sizep = sizep; + return sha1_object_info_extended(sha1, &oi); +} + static void *read_packed_sha1(const unsigned char *sha1, enum object_type *type, unsigned long *size) { @@@ -2239,21 -2077,23 +2184,21 @@@ static void *read_object(const unsigne * deal with them should arrange to call read_object() and give error * messages themselves. */ -void *read_sha1_file_repl(const unsigned char *sha1, - enum object_type *type, - unsigned long *size, - const unsigned char **replacement) +void *read_sha1_file_extended(const unsigned char *sha1, + enum object_type *type, + unsigned long *size, + unsigned flag) { - const unsigned char *repl = lookup_replace_object(sha1); void *data; char *path; const struct packed_git *p; + const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE) + ? lookup_replace_object(sha1) : sha1; errno = 0; data = read_object(repl, type, size); - if (data) { - if (replacement) - *replacement = repl; + if (data) return data; - } if (errno && errno != ENOENT) die_errno("failed to read object %s", sha1_to_hex(sha1)); @@@ -2457,7 -2297,7 +2402,7 @@@ static int write_loose_object(const uns { int fd, ret; unsigned char compressed[4096]; - z_stream stream; + git_zstream stream; git_SHA_CTX c; unsigned char parano_sha1[20]; char *filename; @@@ -2465,6 -2305,8 +2410,6 @@@ filename = sha1_file_name(sha1); fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename); - while (fd < 0 && errno == EMFILE && unuse_one_window(NULL, -1)) - fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename); if (fd < 0) { if (errno == EACCES) return error("insufficient permission for adding an object to repository database %s\n", get_object_directory()); @@@ -2474,7 -2316,7 +2419,7 @@@ /* Set it up */ memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, zlib_compression_level); + git_deflate_init(&stream, zlib_compression_level); stream.next_out = compressed; stream.avail_out = sizeof(compressed); git_SHA1_Init(&c); @@@ -2482,8 -2324,8 +2427,8 @@@ /* First header.. */ stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; - while (deflate(&stream, 0) == Z_OK) - /* nothing */; + while (git_deflate(&stream, 0) == Z_OK) + ; /* nothing */ git_SHA1_Update(&c, hdr, hdrlen); /* Then the data itself.. */ @@@ -2491,7 -2333,7 +2436,7 @@@ stream.avail_in = len; do { unsigned char *in0 = stream.next_in; - ret = deflate(&stream, Z_FINISH); + ret = git_deflate(&stream, Z_FINISH); git_SHA1_Update(&c, in0, stream.next_in - in0); if (write_buffer(fd, compressed, stream.next_out - compressed) < 0) die("unable to write sha1 file"); @@@ -2501,7 -2343,7 +2446,7 @@@ if (ret != Z_STREAM_END) die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret); - ret = deflateEnd(&stream); + ret = git_deflate_end_gently(&stream); if (ret != Z_OK) die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret); git_SHA1_Final(parano_sha1, &c); @@@ -2612,11 -2454,10 +2557,11 @@@ static void check_tag(const void *buf, } static int index_mem(unsigned char *sha1, void *buf, size_t size, - int write_object, enum object_type type, - const char *path, int format_check) + enum object_type type, + const char *path, unsigned flags) { int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; if (!type) type = OBJ_BLOB; @@@ -2632,7 -2473,7 +2577,7 @@@ re_allocated = 1; } } - if (format_check) { + if (flags & HASH_FORMAT_CHECK) { if (type == OBJ_TREE) check_tree(buf, size); if (type == OBJ_COMMIT) @@@ -2650,141 -2491,44 +2595,141 @@@ return ret; } +static int index_pipe(unsigned char *sha1, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path, int format_check) +static int index_core(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) { - struct strbuf sbuf = STRBUF_INIT; - if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, - type, path, format_check); - else - ret = -1; - strbuf_release(&sbuf); - } else if (!size) { - ret = index_mem(sha1, NULL, size, write_object, type, path, - format_check); + if (!size) { + ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, write_object, type, - path, format_check); + ret = index_mem(sha1, buf, size, type, path, flags); else ret = error("short read %s", strerror(errno)); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = index_mem(sha1, buf, size, write_object, type, path, - format_check); + ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } + return ret; +} + +/* + * This creates one packfile per large blob, because the caller + * immediately wants the result sha1, and fast-import can report the + * object name via marks mechanism only by closing the created + * packfile. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above), but the primary + * motivation for trying to stream from the working tree file and to + * avoid mmaping it in core is to deal with large binary blobs, and + * by definition they do _not_ want to get any conversion. + */ +static int index_stream(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + struct child_process fast_import; + char export_marks[512]; + const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; + char tmpfile[512]; + char fast_import_cmd[512]; + char buf[512]; + int len, tmpfd; + + strcpy(tmpfile, git_path("hashstream_XXXXXX")); + tmpfd = git_mkstemp_mode(tmpfile, 0600); + if (tmpfd < 0) + die_errno("cannot create tempfile: %s", tmpfile); + if (close(tmpfd)) + die_errno("cannot close tempfile: %s", tmpfile); + sprintf(export_marks, "--export-marks=%s", tmpfile); + + memset(&fast_import, 0, sizeof(fast_import)); + fast_import.in = -1; + fast_import.argv = argv; + fast_import.git_cmd = 1; + if (start_command(&fast_import)) + die_errno("index-stream: git fast-import failed"); + + len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", + (unsigned long) size); + write_or_whine(fast_import.in, fast_import_cmd, len, + "index-stream: feeding fast-import"); + while (size) { + char buf[10240]; + size_t sz = size < sizeof(buf) ? size : sizeof(buf); + ssize_t actual; + + actual = read_in_full(fd, buf, sz); + if (actual < 0) + die_errno("index-stream: reading input"); + if (write_in_full(fast_import.in, buf, actual) != actual) + die_errno("index-stream: feeding fast-import"); + size -= actual; + } + if (close(fast_import.in)) + die_errno("index-stream: closing fast-import"); + if (finish_command(&fast_import)) + die_errno("index-stream: finishing fast-import"); + + tmpfd = open(tmpfile, O_RDONLY); + if (tmpfd < 0) + die_errno("index-stream: cannot open fast-import mark"); + len = read(tmpfd, buf, sizeof(buf)); + if (len < 0) + die_errno("index-stream: reading fast-import mark"); + if (close(tmpfd) < 0) + die_errno("index-stream: closing fast-import mark"); + if (unlink(tmpfile)) + die_errno("index-stream: unlinking fast-import mark"); + if (len != 44 || + memcmp(":1 ", buf, 3) || + get_sha1_hex(buf + 3, sha1)) + die_errno("index-stream: unexpected fast-import mark: <%s>", buf); + return 0; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) + ret = index_pipe(sha1, fd, type, path, flags); + else if (size <= big_file_threshold || type != OBJ_BLOB) + ret = index_core(sha1, fd, size, type, path, flags); + else + ret = index_stream(sha1, fd, size, type, path, flags); close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) +int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@@ -2795,7 -2539,7 +2740,7 @@@ if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0) + if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@@ -2805,7 -2549,7 +2750,7 @@@ return error("readlink(\"%s\"): %s", path, errstr); } - if (!write_object) + if (!(flags & HASH_WRITE_OBJECT)) hash_sha1_file(sb.buf, sb.len, blob_type, sha1); else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database",