From: Junio C Hamano Date: Thu, 9 Apr 2009 06:21:10 +0000 (-0700) Subject: Merge branch 'jc/maint-1.6.0-keep-pack' into maint X-Git-Tag: v1.6.2.3~6 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/c3067cbfb3fbab32177d5f61ea73127f08ab43cb?hp=-c Merge branch 'jc/maint-1.6.0-keep-pack' into maint * jc/maint-1.6.0-keep-pack: pack-objects: don't loosen objects available in alternate or kept packs t7700: demonstrate repack flaw which may loosen objects unnecessarily Remove --kept-pack-only option and associated infrastructure pack-objects: only repack or loosen objects residing in "local" packs git-repack.sh: don't use --kept-pack-only option to pack-objects t7700-repack: add two new tests demonstrating repacking flaws is_kept_pack(): final clean-up Simplify is_kept_pack() Consolidate ignore_packed logic more has_sha1_kept_pack(): take "struct rev_info" has_sha1_pack(): refactor "pretend these packs do not exist" interface git-repack: resist stray environment variable Conflicts: t/t7700-repack.sh --- c3067cbfb3fbab32177d5f61ea73127f08ab43cb diff --combined builtin-count-objects.c index 62fd1f0961,c095e8dd2b..b814fe5070 --- a/builtin-count-objects.c +++ b/builtin-count-objects.c @@@ -5,7 -5,6 +5,7 @@@ */ #include "cache.h" +#include "dir.h" #include "builtin.h" #include "parse-options.h" @@@ -22,7 -21,9 +22,7 @@@ static void count_objects(DIR *d, char const char *cp; int bad = 0; - if ((ent->d_name[0] == '.') && - (ent->d_name[1] == 0 || - ((ent->d_name[1] == '.') && (ent->d_name[2] == 0)))) + if (is_dot_or_dotdot(ent->d_name)) continue; for (cp = ent->d_name; *cp; cp++) { int ch = *cp; @@@ -42,7 -43,7 +42,7 @@@ if (lstat(path, &st) || !S_ISREG(st.st_mode)) bad = 1; else - (*loose_size) += xsize_t(st.st_blocks); + (*loose_size) += xsize_t(on_disk_bytes(st)); } if (bad) { if (verbose) { @@@ -60,7 -61,7 +60,7 @@@ hex[40] = 0; if (get_sha1_hex(hex, sha1)) die("internal error"); - if (has_sha1_pack(sha1, NULL)) + if (has_sha1_pack(sha1)) (*packed_loose)++; } } @@@ -103,7 -104,6 +103,7 @@@ int cmd_count_objects(int argc, const c if (verbose) { struct packed_git *p; unsigned long num_pack = 0; + unsigned long size_pack = 0; if (!packed_git) prepare_packed_git(); for (p = packed_git; p; p = p->next) { @@@ -112,19 -112,17 +112,19 @@@ if (open_pack_index(p)) continue; packed += p->num_objects; + size_pack += p->pack_size + p->index_size; num_pack++; } printf("count: %lu\n", loose); - printf("size: %lu\n", loose_size / 2); + printf("size: %lu\n", loose_size / 1024); printf("in-pack: %lu\n", packed); printf("packs: %lu\n", num_pack); + printf("size-pack: %lu\n", size_pack / 1024); printf("prune-packable: %lu\n", packed_loose); printf("garbage: %lu\n", garbage); } else printf("%lu objects, %lu kilobytes\n", - loose, loose_size / 2); + loose, loose_size / 1024); return 0; } diff --combined builtin-fsck.c index 64dffa5421,491375dc59..6436bc2248 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@@ -10,7 -10,6 +10,7 @@@ #include "tree-walk.h" #include "fsck.h" #include "parse-options.h" +#include "dir.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@@ -23,7 -22,6 +23,7 @@@ static int check_full static int check_strict; static int keep_cache_objects; static unsigned char head_sha1[20]; +static const char *head_points_at; static int errors_found; static int write_lost_and_found; static int verbose; @@@ -160,7 -158,7 +160,7 @@@ static void check_reachable_object(stru * do a full fsck */ if (!obj->parsed) { - if (has_sha1_pack(obj->sha1, NULL)) + if (has_sha1_pack(obj->sha1)) return; /* it is in pack - forget about it */ printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1)); errors_found |= ERROR_REACHABLE; @@@ -224,16 -222,12 +224,16 @@@ static void check_unreachable_object(st char *buf = read_sha1_file(obj->sha1, &type, &size); if (buf) { - fwrite(buf, size, 1, f); + if (fwrite(buf, size, 1, f) != 1) + die("Could not write %s: %s", + filename, strerror(errno)); free(buf); } } else fprintf(f, "%s\n", sha1_to_hex(obj->sha1)); - fclose(f); + if (fclose(f)) + die("Could not finish %s: %s", + filename, strerror(errno)); } return; } @@@ -397,12 -391,19 +397,12 @@@ static void fsck_dir(int i, char *path while ((de = readdir(dir)) != NULL) { char name[100]; unsigned char sha1[20]; - int len = strlen(de->d_name); - switch (len) { - case 2: - if (de->d_name[1] != '.') - break; - case 1: - if (de->d_name[0] != '.') - break; + if (is_dot_or_dotdot(de->d_name)) continue; - case 38: + if (strlen(de->d_name) == 38) { sprintf(name, "%02x", i); - memcpy(name+2, de->d_name, len+1); + memcpy(name+2, de->d_name, 39); if (get_sha1_hex(name, sha1) < 0) break; add_sha1_list(sha1, DIRENT_SORT_HINT(de)); @@@ -474,8 -475,6 +474,8 @@@ static int fsck_handle_ref(const char * static void get_default_heads(void) { + if (head_points_at && !is_null_sha1(head_sha1)) + fsck_handle_ref("HEAD", head_sha1, 0, NULL); for_each_ref(fsck_handle_ref, NULL); if (include_reflogs) for_each_reflog(fsck_handle_reflog, NULL); @@@ -515,13 -514,14 +515,13 @@@ static void fsck_object_dir(const char static int fsck_head_link(void) { - unsigned char sha1[20]; int flag; int null_is_error = 0; - const char *head_points_at = resolve_ref("HEAD", sha1, 0, &flag); if (verbose) fprintf(stderr, "Checking HEAD link\n"); + head_points_at = resolve_ref("HEAD", head_sha1, 0, &flag); if (!head_points_at) return error("Invalid HEAD"); if (!strcmp(head_points_at, "HEAD")) @@@ -530,7 -530,7 +530,7 @@@ else if (prefixcmp(head_points_at, "refs/heads/")) return error("HEAD points to something strange (%s)", head_points_at); - if (is_null_sha1(sha1)) { + if (is_null_sha1(head_sha1)) { if (null_is_error) return error("HEAD: detached HEAD points at nothing"); fprintf(stderr, "notice: HEAD points to an unborn branch (%s)\n", @@@ -586,7 -586,6 +586,7 @@@ static struct option fsck_opts[] = int cmd_fsck(int argc, const char **argv, const char *prefix) { int i, heads; + struct alternate_object_database *alt; errors_found = 0; @@@ -598,19 -597,17 +598,19 @@@ fsck_head_link(); fsck_object_dir(get_object_directory()); + + prepare_alt_odb(); + for (alt = alt_odb_list; alt; alt = alt->next) { + char namebuf[PATH_MAX]; + int namelen = alt->name - alt->base; + memcpy(namebuf, alt->base, namelen); + namebuf[namelen - 1] = 0; + fsck_object_dir(namebuf); + } + if (check_full) { - struct alternate_object_database *alt; struct packed_git *p; - prepare_alt_odb(); - for (alt = alt_odb_list; alt; alt = alt->next) { - char namebuf[PATH_MAX]; - int namelen = alt->name - alt->base; - memcpy(namebuf, alt->base, namelen); - namebuf[namelen - 1] = 0; - fsck_object_dir(namebuf); - } + prepare_packed_git(); for (p = packed_git; p; p = p->next) /* verify gives error messages itself */ @@@ -627,11 -624,10 +627,11 @@@ } heads = 0; - for (i = 1; i < argc; i++) { + for (i = 0; i < argc; i++) { const char *arg = argv[i]; - if (!get_sha1(arg, head_sha1)) { - struct object *obj = lookup_object(head_sha1); + unsigned char sha1[20]; + if (!get_sha1(arg, sha1)) { + struct object *obj = lookup_object(sha1); /* Error is printed by lookup_object(). */ if (!obj) diff --combined builtin-pack-objects.c index 8ca46c8deb,ad3f8e7751..9fc3b35547 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@@ -78,7 -78,7 +78,7 @@@ static int progress = 1 static int window = 10; static uint32_t pack_size_limit, pack_size_limit_cfg; static int depth = 50; -static int delta_search_threads = 1; +static int delta_search_threads; static int pack_to_stdout; static int num_preferred_base; static struct progress *progress_state; @@@ -195,16 -195,16 +195,16 @@@ static int check_pack_inflate(struct pa int st; memset(&stream, 0, sizeof(stream)); - inflateInit(&stream); + git_inflate_init(&stream); do { in = use_pack(p, w_curs, offset, &stream.avail_in); stream.next_in = in; stream.next_out = fakebuf; stream.avail_out = sizeof(fakebuf); - st = inflate(&stream, Z_FINISH); + st = git_inflate(&stream, Z_FINISH); offset += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); - inflateEnd(&stream); + git_inflate_end(&stream); return (st == Z_STREAM_END && stream.total_out == expect && stream.total_in == len) ? 0 : -1; @@@ -286,7 -286,6 +286,7 @@@ static unsigned long write_object(struc */ if (!to_reuse) { + no_reuse: if (!usable_delta) { buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) @@@ -368,60 -367,46 +368,60 @@@ struct revindex_entry *revidx; off_t offset; - if (entry->delta) { + if (entry->delta) type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; - reused_delta++; - } hdrlen = encode_header(type, entry->size, header); + offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; if (!pack_to_stdout && p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) - die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { + error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (!pack_to_stdout && p->index_version == 1 && + check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { + error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); - if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; + reused_delta++; } else if (type == OBJ_REF_DELTA) { - if (limit && hdrlen + 20 + datalen + 20 >= limit) + if (limit && hdrlen + 20 + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, entry->delta->idx.sha1, 20); hdrlen += 20; + reused_delta++; } else { - if (limit && hdrlen + datalen + 20 >= limit) + if (limit && hdrlen + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); } - - if (!pack_to_stdout && p->index_version == 1 && - check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) - die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); copy_pack_data(f, p, &w_curs, offset, datalen); unuse_pack(&w_curs); reused++; @@@ -488,8 -473,9 +488,8 @@@ static void write_pack_file(void } else { char tmpname[PATH_MAX]; int fd; - snprintf(tmpname, sizeof(tmpname), - "%s/pack/tmp_pack_XXXXXX", get_object_directory()); - fd = xmkstemp(tmpname); + fd = odb_mkstemp(tmpname, sizeof(tmpname), + "pack/tmp_pack_XXXXXX"); pack_tmp_name = xstrdup(tmpname); f = sha1fd(fd, pack_tmp_name); } @@@ -1031,11 -1017,9 +1031,11 @@@ static void check_object(struct object_ * We want in_pack_type even if we do not reuse delta * since non-delta representations could still be reused. */ - used = unpack_object_header_gently(buf, avail, + used = unpack_object_header_buffer(buf, avail, &entry->in_pack_type, &entry->size); + if (used == 0) + goto give_up; /* * Determine if this is a delta and if so whether we can @@@ -1047,8 -1031,6 +1047,8 @@@ /* Not a delta hence we've already got all we need. */ entry->type = entry->in_pack_type; entry->in_pack_header_size = used; + if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB) + goto give_up; unuse_pack(&w_curs); return; case OBJ_REF_DELTA: @@@ -1065,25 -1047,19 +1065,25 @@@ ofs = c & 127; while (c & 128) { ofs += 1; - if (!ofs || MSB(ofs, 7)) - die("delta base offset overflow in pack for %s", - sha1_to_hex(entry->idx.sha1)); + if (!ofs || MSB(ofs, 7)) { + error("delta base offset overflow in pack for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } c = buf[used_0++]; ofs = (ofs << 7) + (c & 127); } - if (ofs >= entry->in_pack_offset) - die("delta base offset out of bound for %s", - sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; + if (ofs <= 0 || ofs >= entry->in_pack_offset) { + error("delta base offset out of bound for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); + if (!revidx) + goto give_up; base_ref = nth_packed_object_sha1(p, revidx->nr); } entry->in_pack_header_size = used + used_0; @@@ -1103,7 -1079,6 +1103,7 @@@ */ entry->type = entry->in_pack_type; entry->delta = base_entry; + entry->delta_size = entry->size; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; unuse_pack(&w_curs); @@@ -1118,8 -1093,6 +1118,8 @@@ */ entry->size = get_size_from_delta(p, &w_curs, entry->in_pack_offset + entry->in_pack_header_size); + if (entry->size == 0) + goto give_up; unuse_pack(&w_curs); return; } @@@ -1129,7 -1102,6 +1129,7 @@@ * with sha1_object_info() to find about the object type * at this point... */ + give_up: unuse_pack(&w_curs); } @@@ -1293,7 -1265,7 +1293,7 @@@ static int try_delta(struct unpacked *t max_size = trg_entry->delta_size; ref_depth = trg->depth; } - max_size = max_size * (max_depth - src->depth) / + max_size = (uint64_t)max_size * (max_depth - src->depth) / (max_depth - ref_depth + 1); if (max_size == 0) return 0; @@@ -1412,10 -1384,12 +1412,10 @@@ static void find_deltas(struct object_e int window, int depth, unsigned *processed) { uint32_t i, idx = 0, count = 0; - unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array; unsigned long mem_usage = 0; - array = xmalloc(array_size); - memset(array, 0, array_size); + array = xcalloc(window, sizeof(struct unpacked)); for (;;) { struct object_entry *entry; @@@ -1611,18 -1585,11 +1611,18 @@@ static void ll_find_deltas(struct objec find_deltas(list, &list_size, window, depth, processed); return; } + if (progress > pack_to_stdout) + fprintf(stderr, "Delta compression using %d threads.\n", + delta_search_threads); /* Partition the work amongst work threads. */ for (i = 0; i < delta_search_threads; i++) { unsigned sub_size = list_size / (delta_search_threads - i); + /* don't use too small segments or no deltas will be found */ + if (sub_size < 2*window && i+1 < delta_search_threads) + sub_size = 0; + p[i].window = window; p[i].depth = depth; p[i].processed = processed; @@@ -1748,16 -1715,6 +1748,16 @@@ static void prepare_pack(int window, in get_object_details(); + /* + * If we're locally repacking then we need to be doubly careful + * from now on in order to make sure no stealth corruption gets + * propagated to the new pack. Clients receiving streamed packs + * should validate everything they get anyway so no need to incur + * the additional cost here in that case. + */ + if (!pack_to_stdout) + do_check_packed_object_crc = 1; + if (!nr_objects || !window || !depth) return; @@@ -1784,14 -1741,6 +1784,14 @@@ if (entry->type < 0) die("unable to get type of object %s", sha1_to_hex(entry->idx.sha1)); + } else { + if (entry->type < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + continue; + } } delta_list[n++] = entry; @@@ -1966,11 -1915,7 +1966,7 @@@ static void add_objects_in_unpacked_pac const unsigned char *sha1; struct object *o; - for (i = 0; i < revs->num_ignore_packed; i++) { - if (matches_pack_name(p, revs->ignore_packed[i])) - break; - } - if (revs->num_ignore_packed <= i) + if (!p->pack_local || p->pack_keep) continue; if (open_pack_index(p)) die("cannot open pack index"); @@@ -1999,6 -1944,29 +1995,29 @@@ free(in_pack.array); } + static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) + { + static struct packed_git *last_found = (void *)1; + struct packed_git *p; + + p = (last_found != (void *)1) ? last_found : packed_git; + + while (p) { + if ((!p->pack_local || p->pack_keep) && + find_pack_entry_one(sha1, p)) { + last_found = p; + return 1; + } + if (p == last_found) + p = packed_git; + else + p = p->next; + if (p == last_found) + p = p->next; + } + return 0; + } + static void loosen_unused_packed_objects(struct rev_info *revs) { struct packed_git *p; @@@ -2006,11 -1974,7 +2025,7 @@@ const unsigned char *sha1; for (p = packed_git; p; p = p->next) { - for (i = 0; i < revs->num_ignore_packed; i++) { - if (matches_pack_name(p, revs->ignore_packed[i])) - break; - } - if (revs->num_ignore_packed <= i) + if (!p->pack_local || p->pack_keep) continue; if (open_pack_index(p)) @@@ -2018,7 -1982,8 +2033,8 @@@ for (i = 0; i < p->num_objects; i++) { sha1 = nth_packed_object_sha1(p, i); - if (!locate_object_entry(sha1)) + if (!locate_object_entry(sha1) && + !has_sha1_pack_kept_or_nonlocal(sha1)) if (force_object_loose(sha1, p->mtime)) die("unable to force loose object"); } @@@ -2208,7 -2173,6 +2224,6 @@@ int cmd_pack_objects(int argc, const ch continue; } if (!strcmp("--unpacked", arg) || - !prefixcmp(arg, "--unpacked=") || !strcmp("--reflog", arg) || !strcmp("--all", arg)) { use_internal_rev_list = 1; diff --combined cache.h index 189151de25,23c16d0d99..50179687b5 --- a/cache.h +++ b/cache.h @@@ -6,22 -6,12 +6,22 @@@ #include "hash.h" #include SHA1_HEADER -#include +#ifndef git_SHA_CTX +#define git_SHA_CTX SHA_CTX +#define git_SHA1_Init SHA1_Init +#define git_SHA1_Update SHA1_Update +#define git_SHA1_Final SHA1_Final +#endif +#include #if defined(NO_DEFLATE_BOUND) || ZLIB_VERNUM < 0x1200 #define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11) #endif +void git_inflate_init(z_streamp strm); +void git_inflate_end(z_streamp strm); +int git_inflate(z_streamp strm, int flush); + #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) #define DTYPE(de) ((de)->d_type) #else @@@ -119,26 -109,6 +119,26 @@@ struct ondisk_cache_entry char name[FLEX_ARRAY]; /* more */ }; +/* + * This struct is used when CE_EXTENDED bit is 1 + * The struct must match ondisk_cache_entry exactly from + * ctime till flags + */ +struct ondisk_cache_entry_extended { + struct cache_time ctime; + struct cache_time mtime; + unsigned int dev; + unsigned int ino; + unsigned int mode; + unsigned int uid; + unsigned int gid; + unsigned int size; + unsigned char sha1[20]; + unsigned short flags; + unsigned short flags2; + char name[FLEX_ARRAY]; /* more */ +}; + struct cache_entry { unsigned int ce_ctime; unsigned int ce_mtime; @@@ -156,19 -126,10 +156,19 @@@ #define CE_NAMEMASK (0x0fff) #define CE_STAGEMASK (0x3000) +#define CE_EXTENDED (0x4000) #define CE_VALID (0x8000) #define CE_STAGESHIFT 12 -/* In-memory only */ +/* + * Range 0xFFFF0000 in ce_flags is divided into + * two parts: in-memory flags and on-disk ones. + * Flags in CE_EXTENDED_FLAGS will get saved on-disk + * if you want to save a new flag, add it in + * CE_EXTENDED_FLAGS + * + * In-memory only flags + */ #define CE_UPDATE (0x10000) #define CE_REMOVE (0x20000) #define CE_UPTODATE (0x40000) @@@ -177,25 -138,6 +177,25 @@@ #define CE_HASHED (0x100000) #define CE_UNHASHED (0x200000) +/* + * Extended on-disk flags + */ +#define CE_INTENT_TO_ADD 0x20000000 +/* CE_EXTENDED2 is for future extension */ +#define CE_EXTENDED2 0x80000000 + +#define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD) + +/* + * Safeguard to avoid saving wrong flags: + * - CE_EXTENDED2 won't get saved until its semantic is known + * - Bits in 0x0000FFFF have been saved in ce_flags already + * - Bits in 0x003F0000 are currently in-memory flags + */ +#if CE_EXTENDED_FLAGS & 0x803FFFFF +#error "CE_EXTENDED_FLAGS out of range" +#endif + /* * Copy the sha1 and stat state of a cache entry from one to * another. But we never change the name, or the hash state! @@@ -228,9 -170,7 +228,9 @@@ static inline size_t ce_namelen(const s } #define ce_size(ce) cache_entry_size(ce_namelen(ce)) -#define ondisk_ce_size(ce) ondisk_cache_entry_size(ce_namelen(ce)) +#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \ + ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ + ondisk_cache_entry_size(ce_namelen(ce))) #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT) #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE) #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE) @@@ -273,10 -213,8 +273,10 @@@ static inline int ce_to_dtype(const str (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \ S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK) -#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7) -#define ondisk_cache_entry_size(len) ((offsetof(struct ondisk_cache_entry,name) + (len) + 8) & ~7) +#define flexible_size(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7) +#define cache_entry_size(len) flexible_size(cache_entry,len) +#define ondisk_cache_entry_size(len) flexible_size(ondisk_cache_entry,len) +#define ondisk_cache_entry_extended_size(len) flexible_size(ondisk_cache_entry_extended,len) struct index_state { struct cache_entry **cache; @@@ -317,7 -255,6 +317,7 @@@ static inline void remove_name_hash(str #define read_cache() read_index(&the_index) #define read_cache_from(path) read_index_from(&the_index, (path)) +#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec)) #define is_cache_unborn() is_index_unborn(&the_index) #define read_cache_unmerged() read_index_unmerged(&the_index) #define write_cache(newfd, cache, entries) write_index(&the_index, (newfd)) @@@ -377,7 -314,6 +377,7 @@@ extern int is_bare_repository(void) extern int is_inside_git_dir(void); extern char *git_work_tree_cfg; extern int is_inside_work_tree(void); +extern int have_git_dir(void); extern const char *get_git_dir(void); extern char *get_object_directory(void); extern char *get_index_file(void); @@@ -424,7 -360,6 +424,7 @@@ extern int init_db(const char *template /* Initialize and use the cache information */ extern int read_index(struct index_state *); +extern int read_index_preload(struct index_state *, const char **pathspec); extern int read_index_from(struct index_state *, const char *path); extern int is_index_unborn(struct index_state *); extern int read_index_unmerged(struct index_state *); @@@ -438,7 -373,6 +438,7 @@@ extern int index_name_pos(const struct #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ #define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */ #define ADD_CACHE_JUST_APPEND 8 /* Append only; tree.c::read_tree() */ +#define ADD_CACHE_NEW_ONLY 16 /* Do not replace existing ones */ extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option); extern struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really); extern void rename_index_entry_at(struct index_state *, int pos, const char *new_name); @@@ -447,8 -381,6 +447,8 @@@ extern int remove_file_from_index(struc #define ADD_CACHE_VERBOSE 1 #define ADD_CACHE_PRETEND 2 #define ADD_CACHE_IGNORE_ERRORS 4 +#define ADD_CACHE_IGNORE_REMOVAL 8 +#define ADD_CACHE_INTENT 16 extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags); extern int add_file_to_index(struct index_state *, const char *path, int flags); extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, int refresh); @@@ -464,6 -396,7 +464,6 @@@ extern int ie_modified(const struct ind extern int ce_path_match(const struct cache_entry *ce, const char **pathspec); extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path); -extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object); extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); @@@ -484,7 -417,6 +484,7 @@@ struct lock_file }; #define LOCK_DIE_ON_ERROR 1 #define LOCK_NODEREF 2 +extern NORETURN void unable_to_lock_index_die(const char *path, int err); extern int hold_lock_file_for_update(struct lock_file *, const char *path, int); extern int hold_lock_file_for_append(struct lock_file *, const char *path, int); extern int commit_lock_file(struct lock_file *); @@@ -516,7 -448,6 +516,7 @@@ extern size_t packed_git_limit extern size_t delta_base_cache_limit; extern int auto_crlf; extern int fsync_object_files; +extern int core_preload_index; enum safe_crlf { SAFE_CRLF_FALSE = 0, @@@ -527,7 -458,6 +527,7 @@@ extern enum safe_crlf safe_crlf; enum branch_track { + BRANCH_TRACK_UNSPECIFIED = -1, BRANCH_TRACK_NEVER = 0, BRANCH_TRACK_REMOTE, BRANCH_TRACK_ALWAYS, @@@ -587,13 -517,6 +587,13 @@@ static inline void hashclr(unsigned cha { memset(hash, 0, 20); } +extern int is_empty_blob_sha1(const unsigned char *sha1); + +#define EMPTY_TREE_SHA1_HEX \ + "4b825dc642cb6eb9a060e54bf8d69288fbee4904" +#define EMPTY_TREE_SHA1_BIN \ + "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \ + "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04" int git_mkstemp(char *path, size_t n, const char *template); @@@ -621,13 -544,11 +621,13 @@@ static inline int is_absolute_path(cons { return path[0] == '/' || has_dos_drive_prefix(path); } +int is_directory(const char *); const char *make_absolute_path(const char *path); const char *make_nonrelative_path(const char *path); const char *make_relative_path(const char *abs, const char *base); -int normalize_absolute_path(char *buf, const char *path); +int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, const char *prefix_list); +char *strip_path_suffix(const char *path, const char *suffix); /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ extern int sha1_object_info(const unsigned char *, unsigned long *); @@@ -637,14 -558,14 +637,14 @@@ extern int write_sha1_file(void *buf, u extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); -/* just like read_sha1_file(), but non fatal in presence of bad objects */ -extern void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size); +/* global flag to enable extra checks when accessing packed objects */ +extern int do_check_packed_object_crc; extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); extern int move_temp_to_file(const char *tmpfile, const char *filename); - extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); + extern int has_sha1_pack(const unsigned char *sha1); extern int has_sha1_file(const unsigned char *sha1); extern int has_loose_object_nonlocal(const unsigned char *sha1); @@@ -669,7 -590,6 +669,7 @@@ extern int read_ref(const char *filenam extern const char *resolve_ref(const char *path, unsigned char *sha1, int, int *); extern int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref); extern int dwim_log(const char *str, int len, unsigned char *sha1, char **ref); +extern int interpret_nth_last_branch(const char *str, struct strbuf *); extern int refname_match(const char *abbrev_name, const char *full_name, const char **rules); extern const char *ref_rev_parse_rules[]; @@@ -696,8 -616,7 +696,8 @@@ enum date_mode DATE_SHORT, DATE_LOCAL, DATE_ISO8601, - DATE_RFC2822 + DATE_RFC2822, + DATE_RAW }; const char *show_date(unsigned long time, int timezone, enum date_mode mode); @@@ -725,10 -644,6 +725,10 @@@ struct checkout extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); extern int has_symlink_leading_path(int len, const char *name); +extern int has_symlink_or_noent_leading_path(int len, const char *name); +extern int has_dirs_only_path(int len, const char *name, int prefix_len); +extern void invalidate_lstat_cache(int len, const char *name); +extern void clear_lstat_cache(void); extern struct alternate_object_database { struct alternate_object_database *next; @@@ -737,8 -652,6 +737,8 @@@ } *alt_odb_list; extern void prepare_alt_odb(void); extern void add_to_alternates_file(const char *reference); +typedef int alt_odb_fn(struct alternate_object_database *, void *); +extern void foreach_alt_odb(alt_odb_fn, void*); struct pack_window { struct pack_window *next; @@@ -808,11 -721,7 +808,11 @@@ extern struct child_process *git_connec extern int finish_connect(struct child_process *conn); extern int path_match(const char *path, int nr, char **match); extern int get_ack(int fd, unsigned char *result_sha1); -extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags); +struct extra_have_objects { + int nr, alloc; + unsigned char (*array)[20]; +}; +extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags, struct extra_have_objects *); extern int server_supports(const char *feature); extern struct packed_git *parse_pack_index(unsigned char *sha1); @@@ -830,16 -739,14 +830,15 @@@ extern unsigned char* use_pack(struct p extern void close_pack_windows(struct packed_git *); extern void unuse_pack(struct pack_window **); extern void free_pack_by_name(const char *); +extern void clear_delta_base_cache(void); extern struct packed_git *add_packed_git(const char *, int, int); extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t); extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t); extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); -extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); +extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); - extern int matches_pack_name(struct packed_git *p, const char *name); /* Dumb servers support */ extern int update_server_info(int); @@@ -848,6 -755,7 +847,6 @@@ typedef int (*config_fn_t)(const char * extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); extern int git_config(config_fn_t fn, void *); -extern int git_parse_long(const char *, long *); extern int git_parse_ulong(const char *, unsigned long *); extern int git_config_int(const char *, const char *); extern unsigned long git_config_ulong(const char *, const char *); @@@ -871,7 -779,6 +870,7 @@@ extern int user_ident_explicitly_given extern const char *git_commit_encoding; extern const char *git_log_output_encoding; +extern const char *git_mailmap_file; /* IO helper functions */ extern void maybe_flush_or_die(FILE *, const char *); @@@ -947,6 -854,7 +946,6 @@@ extern int ws_fix_copy(char *, const ch extern int ws_blank_line(const char *line, int len, unsigned ws_rule); /* ls-files */ -int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen); int report_path_error(const char *ps_matched, const char **pathspec, int prefix_offset); void overlay_tree_on_cache(const char *tree_name, const char *prefix); diff --combined diff.c index 11798af0c1,a34d26c23f..37f99209a0 --- a/diff.c +++ b/diff.c @@@ -11,8 -11,6 +11,8 @@@ #include "attr.h" #include "run-command.h" #include "utf8.h" +#include "userdiff.h" +#include "sigchain.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@@ -22,12 -20,9 +22,12 @@@ static int diff_detect_rename_default; static int diff_rename_limit_default = 200; +static int diff_suppress_blank_empty; int diff_use_color_default = -1; +static const char *diff_word_regex_cfg; static const char *external_diff_cmd_cfg; int diff_auto_refresh_index = 1; +static int diff_mnemonic_prefix; static char diff_colors[][COLOR_MAXLEN] = { "\033[m", /* reset */ @@@ -40,9 -35,6 +40,9 @@@ "\033[41m", /* WHITESPACE (red background) */ }; +static void diff_filespec_load_driver(struct diff_filespec *one); +static char *run_textconv(const char *, struct diff_filespec *, size_t *); + static int parse_diff_color_slot(const char *var, int ofs) { if (!strcasecmp(var+ofs, "plain")) @@@ -62,6 -54,80 +62,6 @@@ die("bad config variable '%s'", var); } -static struct ll_diff_driver { - const char *name; - struct ll_diff_driver *next; - const char *cmd; -} *user_diff, **user_diff_tail; - -/* - * Currently there is only "diff..command" variable; - * because there are "diff.color." variables, we are parsing - * this in a bit convoluted way to allow low level diff driver - * called "color". - */ -static int parse_lldiff_command(const char *var, const char *ep, const char *value) -{ - const char *name; - int namelen; - struct ll_diff_driver *drv; - - name = var + 5; - namelen = ep - name; - for (drv = user_diff; drv; drv = drv->next) - if (!strncmp(drv->name, name, namelen) && !drv->name[namelen]) - break; - if (!drv) { - drv = xcalloc(1, sizeof(struct ll_diff_driver)); - drv->name = xmemdupz(name, namelen); - if (!user_diff_tail) - user_diff_tail = &user_diff; - *user_diff_tail = drv; - user_diff_tail = &(drv->next); - } - - return git_config_string(&(drv->cmd), var, value); -} - -/* - * 'diff..funcname' attribute can be specified in the configuration - * to define a customized regexp to find the beginning of a function to - * be used for hunk header lines of "diff -p" style output. - */ -struct funcname_pattern_entry { - char *name; - char *pattern; - int cflags; -}; -static struct funcname_pattern_list { - struct funcname_pattern_list *next; - struct funcname_pattern_entry e; -} *funcname_pattern_list; - -static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags) -{ - const char *name; - int namelen; - struct funcname_pattern_list *pp; - - name = var + 5; /* "diff." */ - namelen = ep - name; - - for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen]) - break; - if (!pp) { - pp = xcalloc(1, sizeof(*pp)); - pp->e.name = xmemdupz(name, namelen); - pp->next = funcname_pattern_list; - funcname_pattern_list = pp; - } - free(pp->e.pattern); - pp->e.pattern = xstrdup(value); - pp->e.cflags = cflags; - return 0; -} - /* * These are to give UI layer defaults. * The core-level commands such as git-diff-files should @@@ -88,14 -154,14 +88,14 @@@ int git_diff_ui_config(const char *var diff_auto_refresh_index = git_config_bool(var, value); return 0; } + if (!strcmp(var, "diff.mnemonicprefix")) { + diff_mnemonic_prefix = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "diff.external")) return git_config_string(&external_diff_cmd_cfg, var, value); - if (!prefixcmp(var, "diff.")) { - const char *ep = strrchr(var, '.'); - - if (ep != var + 4 && !strcmp(ep, ".command")) - return parse_lldiff_command(var, ep, value); - } + if (!strcmp(var, "diff.wordregex")) + return git_config_string(&diff_word_regex_cfg, var, value); return git_diff_basic_config(var, value, cb); } @@@ -107,12 -173,6 +107,12 @@@ int git_diff_basic_config(const char *v return 0; } + switch (userdiff_config(var, value)) { + case 0: break; + case -1: return -1; + default: return 0; + } + if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) { int slot = parse_diff_color_slot(var, 11); if (!value) @@@ -121,12 -181,21 +121,12 @@@ return 0; } - if (!prefixcmp(var, "diff.")) { - const char *ep = strrchr(var, '.'); - if (ep != var + 4) { - if (!strcmp(ep, ".funcname")) { - if (!value) - return config_error_nonbool(var); - return parse_funcname_pattern(var, ep, value, - 0); - } else if (!strcmp(ep, ".xfuncname")) { - if (!value) - return config_error_nonbool(var); - return parse_funcname_pattern(var, ep, value, - REG_EXTENDED); - } - } + /* like GNU diff's --suppress-blank-empty option */ + if (!strcmp(var, "diff.suppressblankempty") || + /* for backwards compatibility */ + !strcmp(var, "diff.suppress-blank-empty")) { + diff_suppress_blank_empty = git_config_bool(var, value); + return 0; } return git_color_default_config(var, value, cb); @@@ -136,8 -205,9 +136,8 @@@ static char *quote_two(const char *one { int need_one = quote_c_style(one, NULL, NULL, 1); int need_two = quote_c_style(two, NULL, NULL, 1); - struct strbuf res; + struct strbuf res = STRBUF_INIT; - strbuf_init(&res, 0); if (need_one + need_two) { strbuf_addch(&res, '"'); quote_c_style(one, &res, NULL, 1); @@@ -171,33 -241,6 +171,33 @@@ static struct diff_tempfile char tmp_path[PATH_MAX]; } diff_temp[2]; +static struct diff_tempfile *claim_diff_tempfile(void) { + int i; + for (i = 0; i < ARRAY_SIZE(diff_temp); i++) + if (!diff_temp[i].name) + return diff_temp + i; + die("BUG: diff is failing to clean up its tempfiles"); +} + +static int remove_tempfile_installed; + +static void remove_tempfile(void) +{ + int i; + for (i = 0; i < ARRAY_SIZE(diff_temp); i++) { + if (diff_temp[i].name == diff_temp[i].tmp_path) + unlink(diff_temp[i].name); + diff_temp[i].name = NULL; + } +} + +static void remove_tempfile_on_signal(int signo) +{ + remove_tempfile(); + sigchain_pop(signo); + raise(signo); +} + static int count_lines(const char *data, int size) { int count, ch, completely_empty = 1, nl_just_seen = 0; @@@ -262,8 -305,6 +262,8 @@@ static void emit_rewrite_diff(const cha const char *name_b, struct diff_filespec *one, struct diff_filespec *two, + const char *textconv_one, + const char *textconv_two, struct diff_options *o) { int lc_a, lc_b; @@@ -275,17 -316,6 +275,17 @@@ const char *new = diff_get_color(color_diff, DIFF_FILE_NEW); const char *reset = diff_get_color(color_diff, DIFF_RESET); static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT; + const char *a_prefix, *b_prefix; + const char *data_one, *data_two; + size_t size_one, size_two; + + if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) { + a_prefix = o->b_prefix; + b_prefix = o->a_prefix; + } else { + a_prefix = o->a_prefix; + b_prefix = o->b_prefix; + } name_a += (*name_a == '/'); name_b += (*name_b == '/'); @@@ -294,32 -324,13 +294,32 @@@ strbuf_reset(&a_name); strbuf_reset(&b_name); - quote_two_c_style(&a_name, o->a_prefix, name_a, 0); - quote_two_c_style(&b_name, o->b_prefix, name_b, 0); + quote_two_c_style(&a_name, a_prefix, name_a, 0); + quote_two_c_style(&b_name, b_prefix, name_b, 0); diff_populate_filespec(one, 0); diff_populate_filespec(two, 0); - lc_a = count_lines(one->data, one->size); - lc_b = count_lines(two->data, two->size); + if (textconv_one) { + data_one = run_textconv(textconv_one, one, &size_one); + if (!data_one) + die("unable to read files to diff"); + } + else { + data_one = one->data; + size_one = one->size; + } + if (textconv_two) { + data_two = run_textconv(textconv_two, two, &size_two); + if (!data_two) + die("unable to read files to diff"); + } + else { + data_two = two->data; + size_two = two->size; + } + + lc_a = count_lines(data_one, size_one); + lc_b = count_lines(data_two, size_two); fprintf(o->file, "%s--- %s%s%s\n%s+++ %s%s%s\n%s@@ -", metainfo, a_name.buf, name_a_tab, reset, @@@ -329,9 -340,9 +329,9 @@@ print_line_count(o->file, lc_b); fprintf(o->file, " @@%s\n", reset); if (lc_a) - copy_file_with_prefix(o->file, '-', one->data, one->size, old, reset); + copy_file_with_prefix(o->file, '-', data_one, size_one, old, reset); if (lc_b) - copy_file_with_prefix(o->file, '+', two->data, two->size, new, reset); + copy_file_with_prefix(o->file, '+', data_two, size_two, new, reset); } static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) @@@ -343,7 -354,6 +343,7 @@@ } else if (diff_populate_filespec(one, 0)) return -1; + mf->ptr = one->data; mf->size = one->size; return 0; @@@ -352,138 -362,83 +352,138 @@@ struct diff_words_buffer { mmfile_t text; long alloc; - long current; /* output pointer */ - int suppressed_newline; + struct diff_words_orig { + const char *begin, *end; + } *orig; + int orig_nr, orig_alloc; }; static void diff_words_append(char *line, unsigned long len, struct diff_words_buffer *buffer) { - if (buffer->text.size + len > buffer->alloc) { - buffer->alloc = (buffer->text.size + len) * 3 / 2; - buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc); - } + ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc); line++; len--; memcpy(buffer->text.ptr + buffer->text.size, line, len); buffer->text.size += len; + buffer->text.ptr[buffer->text.size] = '\0'; } struct diff_words_data { - struct xdiff_emit_state xm; struct diff_words_buffer minus, plus; + const char *current_plus; FILE *file; + regex_t *word_regex; }; -static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color, - int suppress_newline) +static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) { - const char *ptr; - int eol = 0; + struct diff_words_data *diff_words = priv; + int minus_first, minus_len, plus_first, plus_len; + const char *minus_begin, *minus_end, *plus_begin, *plus_end; - if (len == 0) + if (line[0] != '@' || parse_hunk_header(line, len, + &minus_first, &minus_len, &plus_first, &plus_len)) return; - ptr = buffer->text.ptr + buffer->current; - buffer->current += len; + /* POSIX requires that first be decremented by one if len == 0... */ + if (minus_len) { + minus_begin = diff_words->minus.orig[minus_first].begin; + minus_end = + diff_words->minus.orig[minus_first + minus_len - 1].end; + } else + minus_begin = minus_end = + diff_words->minus.orig[minus_first].end; - if (ptr[len - 1] == '\n') { - eol = 1; - len--; + if (plus_len) { + plus_begin = diff_words->plus.orig[plus_first].begin; + plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end; + } else + plus_begin = plus_end = diff_words->plus.orig[plus_first].end; + + if (diff_words->current_plus != plus_begin) + fwrite(diff_words->current_plus, + plus_begin - diff_words->current_plus, 1, + diff_words->file); + if (minus_begin != minus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + minus_end - minus_begin, minus_begin); + if (plus_begin != plus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_NEW), + plus_end - plus_begin, plus_begin); + + diff_words->current_plus = plus_end; +} + +/* This function starts looking at *begin, and returns 0 iff a word was found. */ +static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, + int *begin, int *end) +{ + if (word_regex && *begin < buffer->size) { + regmatch_t match[1]; + if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + char *p = memchr(buffer->ptr + *begin + match[0].rm_so, + '\n', match[0].rm_eo - match[0].rm_so); + *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; + *begin += match[0].rm_so; + return *begin >= *end; + } + return -1; } - fputs(diff_get_color(1, color), file); - fwrite(ptr, len, 1, file); - fputs(diff_get_color(1, DIFF_RESET), file); + /* find the next word */ + while (*begin < buffer->size && isspace(buffer->ptr[*begin])) + (*begin)++; + if (*begin >= buffer->size) + return -1; - if (eol) { - if (suppress_newline) - buffer->suppressed_newline = 1; - else - putc('\n', file); - } + /* find the end of the word */ + *end = *begin + 1; + while (*end < buffer->size && !isspace(buffer->ptr[*end])) + (*end)++; + + return 0; } -static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) +/* + * This function splits the words in buffer->text, stores the list with + * newline separator into out, and saves the offsets of the original words + * in buffer->orig. + */ +static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out, + regex_t *word_regex) { - struct diff_words_data *diff_words = priv; + int i, j; + long alloc = 0; - if (diff_words->minus.suppressed_newline) { - if (line[0] != '+') - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } + out->size = 0; + out->ptr = NULL; - len--; - switch (line[0]) { - case '-': - print_word(diff_words->file, - &diff_words->minus, len, DIFF_FILE_OLD, 1); - break; - case '+': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_FILE_NEW, 0); - break; - case ' ': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_PLAIN, 0); - diff_words->minus.current += len; - break; + /* fake an empty "0th" word */ + ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc); + buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr; + buffer->orig_nr = 1; + + for (i = 0; i < buffer->text.size; i++) { + if (find_word_boundaries(&buffer->text, word_regex, &i, &j)) + return; + + /* store original boundaries */ + ALLOC_GROW(buffer->orig, buffer->orig_nr + 1, + buffer->orig_alloc); + buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i; + buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j; + buffer->orig_nr++; + + /* store one word */ + ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc); + memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i); + out->ptr[out->size + j - i] = '\n'; + out->size += j - i + 1; + + i = j - 1; } } @@@ -494,41 -449,46 +494,41 @@@ static void diff_words_show(struct diff xdemitconf_t xecfg; xdemitcb_t ecb; mmfile_t minus, plus; - int i; - memset(&xecfg, 0, sizeof(xecfg)); - minus.size = diff_words->minus.text.size; - minus.ptr = xmalloc(minus.size); - memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size); - for (i = 0; i < minus.size; i++) - if (isspace(minus.ptr[i])) - minus.ptr[i] = '\n'; - diff_words->minus.current = 0; - - plus.size = diff_words->plus.text.size; - plus.ptr = xmalloc(plus.size); - memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size); - for (i = 0; i < plus.size; i++) - if (isspace(plus.ptr[i])) - plus.ptr[i] = '\n'; - diff_words->plus.current = 0; + /* special case: only removal */ + if (!diff_words->plus.text.size) { + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + diff_words->minus.text.size, diff_words->minus.text.ptr); + diff_words->minus.text.size = 0; + return; + } - xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc; - ecb.outf = xdiff_outf; - ecb.priv = diff_words; - diff_words->xm.consume = fn_out_diff_words_aux; - xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb); + diff_words->current_plus = diff_words->plus.text.ptr; + memset(&xpp, 0, sizeof(xpp)); + memset(&xecfg, 0, sizeof(xecfg)); + diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex); + diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex); + xpp.flags = XDF_NEED_MINIMAL; + /* as only the hunk header will be parsed, we need a 0-context */ + xecfg.ctxlen = 0; + xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words, + &xpp, &xecfg, &ecb); free(minus.ptr); free(plus.ptr); + if (diff_words->current_plus != diff_words->plus.text.ptr + + diff_words->plus.text.size) + fwrite(diff_words->current_plus, + diff_words->plus.text.ptr + diff_words->plus.text.size + - diff_words->current_plus, 1, + diff_words->file); diff_words->minus.text.size = diff_words->plus.text.size = 0; - - if (diff_words->minus.suppressed_newline) { - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } } typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); struct emit_callback { - struct xdiff_emit_state xm; int nparents, color_diff; unsigned ws_rule; sane_truncate_fn truncate; @@@ -547,10 -507,7 +547,10 @@@ static void free_diff_words_data(struc diff_words_show(ecbdata->diff_words); free (ecbdata->diff_words->minus.text.ptr); + free (ecbdata->diff_words->minus.orig); free (ecbdata->diff_words->plus.text.ptr); + free (ecbdata->diff_words->plus.orig); + free(ecbdata->diff_words->word_regex); free(ecbdata->diff_words); ecbdata->diff_words = NULL; } @@@ -641,12 -598,6 +641,12 @@@ static void fn_out_consume(void *priv, ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } + if (diff_suppress_blank_empty + && len == 2 && line[0] == ' ' && line[1] == '\n') { + line[0] = '\n'; + len = 1; + } + /* This is not really necessary for now because * this codepath only deals with two-way diffs. */ @@@ -710,7 -661,7 +710,7 @@@ static char *pprint_rename(const char * { const char *old = a; const char *new = b; - struct strbuf name; + struct strbuf name = STRBUF_INIT; int pfx_length, sfx_length; int len_a = strlen(a); int len_b = strlen(b); @@@ -718,6 -669,7 +718,6 @@@ int qlen_a = quote_c_style(a, NULL, NULL, 0); int qlen_b = quote_c_style(b, NULL, NULL, 0); - strbuf_init(&name, 0); if (qlen_a || qlen_b) { quote_c_style(a, &name, NULL, 0); strbuf_addstr(&name, " => "); @@@ -774,6 -726,8 +774,6 @@@ } struct diffstat_t { - struct xdiff_emit_state xm; - int nr; int alloc; struct diffstat_file { @@@ -860,7 -814,8 +860,7 @@@ static void fill_print_name(struct diff return; if (!file->is_renamed) { - struct strbuf buf; - strbuf_init(&buf, 0); + struct strbuf buf = STRBUF_INIT; if (quote_c_style(file->name, &buf, NULL, 0)) { pname = strbuf_detach(&buf, NULL); } else { @@@ -1167,13 -1122,9 +1167,13 @@@ static void show_dirstat(struct diff_op /* * Original minus copied is the removed material, * added is the new material. They are both damages - * made to the preimage. + * made to the preimage. In --dirstat-by-file mode, count + * damaged files, not damaged lines. This is done by + * counting only a single damaged line per file. */ damage = (p->one->size - copied) + added; + if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0) + damage = 1; ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc); dir.files[dir.nr].name = name; @@@ -1206,6 -1157,7 +1206,6 @@@ static void free_diffstat_info(struct d } struct checkdiff_t { - struct xdiff_emit_state xm; const char *filename; int lineno; struct diff_options *o; @@@ -1380,61 -1332,123 +1380,61 @@@ static void emit_binary_diff(FILE *file emit_binary_diff_body(file, two, one); } -static void setup_diff_attr_check(struct git_attr_check *check) +static void diff_filespec_load_driver(struct diff_filespec *one) { - static struct git_attr *attr_diff; - - if (!attr_diff) { - attr_diff = git_attr("diff", 4); - } - check[0].attr = attr_diff; + if (!one->driver) + one->driver = userdiff_find_by_path(one->path); + if (!one->driver) + one->driver = userdiff_find_by_name("default"); } -static void diff_filespec_check_attr(struct diff_filespec *one) +int diff_filespec_is_binary(struct diff_filespec *one) { - struct git_attr_check attr_diff_check; - int check_from_data = 0; - - if (one->checked_attr) - return; - - setup_diff_attr_check(&attr_diff_check); - one->is_binary = 0; - one->funcname_pattern_ident = NULL; - - if (!git_checkattr(one->path, 1, &attr_diff_check)) { - const char *value; - - /* binaryness */ - value = attr_diff_check.value; - if (ATTR_TRUE(value)) - ; - else if (ATTR_FALSE(value)) - one->is_binary = 1; - else - check_from_data = 1; - - /* funcname pattern ident */ - if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value)) - ; - else - one->funcname_pattern_ident = value; - } - - if (check_from_data) { - if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(one, 0); - - if (one->data) - one->is_binary = buffer_is_binary(one->data, one->size); + if (one->is_binary == -1) { + diff_filespec_load_driver(one); + if (one->driver->binary != -1) + one->is_binary = one->driver->binary; + else { + if (!one->data && DIFF_FILE_VALID(one)) + diff_populate_filespec(one, 0); + if (one->data) + one->is_binary = buffer_is_binary(one->data, + one->size); + if (one->is_binary == -1) + one->is_binary = 0; + } } + return one->is_binary; } -int diff_filespec_is_binary(struct diff_filespec *one) +static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one) { - diff_filespec_check_attr(one); - return one->is_binary; + diff_filespec_load_driver(one); + return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } -static const struct funcname_pattern_entry *funcname_pattern(const char *ident) -{ - struct funcname_pattern_list *pp; - - for (pp = funcname_pattern_list; pp; pp = pp->next) - if (!strcmp(ident, pp->e.name)) - return &pp->e; - return NULL; -} - -static const struct funcname_pattern_entry builtin_funcname_pattern[] = { - { "java", - "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", - REG_EXTENDED }, - { "pascal", - "^((procedure|function|constructor|destructor|interface|" - "implementation|initialization|finalization)[ \t]*.*)$" - "|" - "^(.*=[ \t]*(class|record).*)$", - REG_EXTENDED }, - { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", - REG_EXTENDED }, - { "tex", - "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", - REG_EXTENDED }, - { "ruby", "^[ \t]*((class|module|def)[ \t].*)$", - REG_EXTENDED }, -}; - -static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one) +static const char *userdiff_word_regex(struct diff_filespec *one) { - const char *ident; - const struct funcname_pattern_entry *pe; - int i; - - diff_filespec_check_attr(one); - ident = one->funcname_pattern_ident; - - if (!ident) - /* - * If the config file has "funcname.default" defined, that - * regexp is used; otherwise NULL is returned and xemit uses - * the built-in default. - */ - return funcname_pattern("default"); - - /* Look up custom "funcname.$ident" regexp from config. */ - pe = funcname_pattern(ident); - if (pe) - return pe; + diff_filespec_load_driver(one); + return one->driver->word_regex; +} - /* - * And define built-in fallback patterns here. Note that - * these can be overridden by the user's config settings. - */ - for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++) - if (!strcmp(ident, builtin_funcname_pattern[i].name)) - return &builtin_funcname_pattern[i]; +void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) +{ + if (!options->a_prefix) + options->a_prefix = a; + if (!options->b_prefix) + options->b_prefix = b; +} - return NULL; +static const char *get_textconv(struct diff_filespec *one) +{ + if (!DIFF_FILE_VALID(one)) + return NULL; + if (!S_ISREG(one->mode)) + return NULL; + diff_filespec_load_driver(one); + return one->driver->textconv; } static void builtin_diff(const char *name_a, @@@ -1450,29 -1464,13 +1450,29 @@@ char *a_one, *b_two; const char *set = diff_get_color_opt(o, DIFF_METAINFO); const char *reset = diff_get_color_opt(o, DIFF_RESET); + const char *a_prefix, *b_prefix; + const char *textconv_one = NULL, *textconv_two = NULL; + + if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) { + textconv_one = get_textconv(one); + textconv_two = get_textconv(two); + } + + diff_set_mnemonic_prefix(o, "a/", "b/"); + if (DIFF_OPT_TST(o, REVERSE_DIFF)) { + a_prefix = o->b_prefix; + b_prefix = o->a_prefix; + } else { + a_prefix = o->a_prefix; + b_prefix = o->b_prefix; + } /* Never use a non-valid filename anywhere if at all possible */ name_a = DIFF_FILE_VALID(one) ? name_a : name_b; name_b = DIFF_FILE_VALID(two) ? name_b : name_a; - a_one = quote_two(o->a_prefix, name_a + (*name_a == '/')); - b_two = quote_two(o->b_prefix, name_b + (*name_b == '/')); + a_one = quote_two(a_prefix, name_a + (*name_a == '/')); + b_two = quote_two(b_prefix, name_b + (*name_b == '/')); lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; fprintf(o->file, "%sdiff --git %s %s%s\n", set, a_one, b_two, reset); @@@ -1500,11 -1498,8 +1500,11 @@@ */ if ((one->mode ^ two->mode) & S_IFMT) goto free_ab_and_return; - if (complete_rewrite) { - emit_rewrite_diff(name_a, name_b, one, two, o); + if (complete_rewrite && + (textconv_one || !diff_filespec_is_binary(one)) && + (textconv_two || !diff_filespec_is_binary(two))) { + emit_rewrite_diff(name_a, name_b, one, two, + textconv_one, textconv_two, o); o->found_changes = 1; goto free_ab_and_return; } @@@ -1514,8 -1509,7 +1514,8 @@@ die("unable to read files to diff"); if (!DIFF_OPT_TST(o, TEXT) && - (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) { + ( (diff_filespec_is_binary(one) && !textconv_one) || + (diff_filespec_is_binary(two) && !textconv_two) )) { /* Quite common confusing case */ if (mf1.size == mf2.size && !memcmp(mf1.ptr, mf2.ptr, mf1.size)) @@@ -1534,28 -1528,12 +1534,28 @@@ xdemitconf_t xecfg; xdemitcb_t ecb; struct emit_callback ecbdata; - const struct funcname_pattern_entry *pe; + const struct userdiff_funcname *pe; + + if (textconv_one) { + size_t size; + mf1.ptr = run_textconv(textconv_one, one, &size); + if (!mf1.ptr) + die("unable to read files to diff"); + mf1.size = size; + } + if (textconv_two) { + size_t size; + mf2.ptr = run_textconv(textconv_two, two, &size); + if (!mf2.ptr) + die("unable to read files to diff"); + mf2.size = size; + } pe = diff_funcname_pattern(one); if (!pe) pe = diff_funcname_pattern(two); + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); memset(&ecbdata, 0, sizeof(ecbdata)); ecbdata.label_path = lbl; @@@ -1565,7 -1543,6 +1565,7 @@@ ecbdata.file = o->file; xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; xecfg.ctxlen = o->context; + xecfg.interhunkctxlen = o->interhunkcontext; xecfg.flags = XDL_EMIT_FUNCNAMES; if (pe) xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags); @@@ -1575,34 -1552,17 +1575,34 @@@ xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); else if (!prefixcmp(diffopts, "-u")) xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); - ecb.outf = xdiff_outf; - ecb.priv = &ecbdata; - ecbdata.xm.consume = fn_out_consume; if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) { ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); + if (!o->word_regex) + o->word_regex = diff_word_regex_cfg; + if (o->word_regex) { + ecbdata.diff_words->word_regex = (regex_t *) + xmalloc(sizeof(regex_t)); + if (regcomp(ecbdata.diff_words->word_regex, + o->word_regex, + REG_EXTENDED | REG_NEWLINE)) + die ("Invalid regular expression: %s", + o->word_regex); + } } - xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata, + &xpp, &xecfg, &ecb); if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) free_diff_words_data(&ecbdata); + if (textconv_one) + free(mf1.ptr); + if (textconv_two) + free(mf2.ptr); } free_ab_and_return: @@@ -1649,11 -1609,11 +1649,11 @@@ static void builtin_diffstat(const cha xdemitconf_t xecfg; xdemitcb_t ecb; + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; - ecb.outf = xdiff_outf; - ecb.priv = diffstat; - xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat, + &xpp, &xecfg, &ecb); } free_and_return: @@@ -1674,6 -1634,7 +1674,6 @@@ static void builtin_checkdiff(const cha return; memset(&data, 0, sizeof(data)); - data.xm.consume = checkdiff_consume; data.filename = name_b ? name_b : name_a; data.lineno = 0; data.o = o; @@@ -1696,12 -1657,12 +1696,12 @@@ xdemitconf_t xecfg; xdemitcb_t ecb; + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); xecfg.ctxlen = 1; /* at least one context line */ xpp.flags = XDF_NEED_MINIMAL; - ecb.outf = xdiff_outf; - ecb.priv = &data; - xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + xdi_diff_outf(&mf1, &mf2, checkdiff_consume, &data, + &xpp, &xecfg, &ecb); if ((data.ws_rule & WS_TRAILING_SPACE) && data.trailing_blanks_start) { @@@ -1726,7 -1687,6 +1726,7 @@@ struct diff_filespec *alloc_filespec(co spec->path = (char *)(spec + 1); memcpy(spec->path, path, namelen+1); spec->count = 1; + spec->is_binary = -1; return spec; } @@@ -1783,7 -1743,7 +1783,7 @@@ static int reuse_worktree_file(const ch * objects however would tend to be slower as they need * to be individually opened and inflated. */ - if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1, NULL)) + if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1)) return 0; len = strlen(name); @@@ -1811,9 -1771,10 +1811,9 @@@ static int populate_from_stdin(struct diff_filespec *s) { - struct strbuf buf; + struct strbuf buf = STRBUF_INIT; size_t size = 0; - strbuf_init(&buf, 0); if (strbuf_read(&buf, 0, 0) < 0) return error("error while reading from stdin %s", strerror(errno)); @@@ -1865,7 -1826,7 +1865,7 @@@ int diff_populate_filespec(struct diff_ if (!s->sha1_valid || reuse_worktree_file(s->path, s->sha1, 0)) { - struct strbuf buf; + struct strbuf buf = STRBUF_INIT; struct stat st; int fd; @@@ -1885,18 -1846,19 +1885,18 @@@ s->size = xsize_t(st.st_size); if (!s->size) goto empty; - if (size_only) - return 0; if (S_ISLNK(st.st_mode)) { - int ret; - s->data = xmalloc(s->size); - s->should_free = 1; - ret = readlink(s->path, s->data, s->size); - if (ret < 0) { - free(s->data); + struct strbuf sb = STRBUF_INIT; + + if (strbuf_readlink(&sb, s->path, s->size)) goto err_empty; - } + s->size = sb.len; + s->data = strbuf_detach(&sb, NULL); + s->should_free = 1; return 0; } + if (size_only) + return 0; fd = open(s->path, O_RDONLY); if (fd < 0) goto err_empty; @@@ -1907,6 -1869,7 +1907,6 @@@ /* * Convert from working tree format to canonical git format */ - strbuf_init(&buf, 0); if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) { size_t size = 0; munmap(s->data, s->size); @@@ -1948,23 -1911,17 +1948,23 @@@ void diff_free_filespec_data(struct dif s->cnt_data = NULL; } -static void prep_temp_blob(struct diff_tempfile *temp, +static void prep_temp_blob(const char *path, struct diff_tempfile *temp, void *blob, unsigned long size, const unsigned char *sha1, int mode) { int fd; + struct strbuf buf = STRBUF_INIT; fd = git_mkstemp(temp->tmp_path, PATH_MAX, ".diff_XXXXXX"); if (fd < 0) die("unable to create temp-file: %s", strerror(errno)); + if (convert_to_working_tree(path, + (const char *)blob, (size_t)size, &buf)) { + blob = buf.buf; + size = buf.len; + } if (write_in_full(fd, blob, size) != size) die("unable to write temp-file"); close(fd); @@@ -1972,14 -1929,12 +1972,14 @@@ strcpy(temp->hex, sha1_to_hex(sha1)); temp->hex[40] = 0; sprintf(temp->mode, "%06o", mode); + strbuf_release(&buf); } -static void prepare_temp_file(const char *name, - struct diff_tempfile *temp, - struct diff_filespec *one) +static struct diff_tempfile *prepare_temp_file(const char *name, + struct diff_filespec *one) { + struct diff_tempfile *temp = claim_diff_tempfile(); + if (!DIFF_FILE_VALID(one)) { not_a_valid_file: /* A '-' entry produces this for file-2, and @@@ -1988,13 -1943,7 +1988,13 @@@ temp->name = "/dev/null"; strcpy(temp->hex, "."); strcpy(temp->mode, "."); - return; + return temp; + } + + if (!remove_tempfile_installed) { + atexit(remove_tempfile); + sigchain_push_common(remove_tempfile_on_signal); + remove_tempfile_installed = 1; } if (!one->sha1_valid || @@@ -2008,12 -1957,13 +2008,12 @@@ if (S_ISLNK(st.st_mode)) { int ret; char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */ - size_t sz = xsize_t(st.st_size); - if (sizeof(buf) <= st.st_size) - die("symlink too long: %s", name); - ret = readlink(name, buf, sz); + ret = readlink(name, buf, sizeof(buf)); if (ret < 0) die("readlink(%s)", name); - prep_temp_blob(temp, buf, sz, + if (ret == sizeof(buf)) + die("symlink too long: %s", name); + prep_temp_blob(name, temp, buf, ret, (one->sha1_valid ? one->sha1 : null_sha1), (one->sha1_valid ? @@@ -2034,15 -1984,32 +2034,15 @@@ */ sprintf(temp->mode, "%06o", one->mode); } - return; + return temp; } else { if (diff_populate_filespec(one, 0)) die("cannot read data blob for %s", one->path); - prep_temp_blob(temp, one->data, one->size, + prep_temp_blob(name, temp, one->data, one->size, one->sha1, one->mode); } -} - -static void remove_tempfile(void) -{ - int i; - - for (i = 0; i < 2; i++) - if (diff_temp[i].name == diff_temp[i].tmp_path) { - unlink(diff_temp[i].name); - diff_temp[i].name = NULL; - } -} - -static void remove_tempfile_on_signal(int signo) -{ - remove_tempfile(); - signal(SIGINT, SIG_DFL); - raise(signo); + return temp; } /* An external diff command takes: @@@ -2060,22 -2027,34 +2060,22 @@@ static void run_external_diff(const cha int complete_rewrite) { const char *spawn_arg[10]; - struct diff_tempfile *temp = diff_temp; int retval; - static int atexit_asked = 0; - const char *othername; const char **arg = &spawn_arg[0]; - othername = (other? other : name); - if (one && two) { - prepare_temp_file(name, &temp[0], one); - prepare_temp_file(othername, &temp[1], two); - if (! atexit_asked && - (temp[0].name == temp[0].tmp_path || - temp[1].name == temp[1].tmp_path)) { - atexit_asked = 1; - atexit(remove_tempfile); - } - signal(SIGINT, remove_tempfile_on_signal); - } - if (one && two) { + struct diff_tempfile *temp_one, *temp_two; + const char *othername = (other ? other : name); + temp_one = prepare_temp_file(name, one); + temp_two = prepare_temp_file(othername, two); *arg++ = pgm; *arg++ = name; - *arg++ = temp[0].name; - *arg++ = temp[0].hex; - *arg++ = temp[0].mode; - *arg++ = temp[1].name; - *arg++ = temp[1].hex; - *arg++ = temp[1].mode; + *arg++ = temp_one->name; + *arg++ = temp_one->hex; + *arg++ = temp_one->mode; + *arg++ = temp_two->name; + *arg++ = temp_two->hex; + *arg++ = temp_two->mode; if (other) { *arg++ = other; *arg++ = xfrm_msg; @@@ -2094,66 -2073,27 +2094,66 @@@ } } -static const char *external_diff_attr(const char *name) +static int similarity_index(struct diff_filepair *p) { - struct git_attr_check attr_diff_check; + return p->score * 100 / MAX_SCORE; +} - if (!name) - return NULL; +static void fill_metainfo(struct strbuf *msg, + const char *name, + const char *other, + struct diff_filespec *one, + struct diff_filespec *two, + struct diff_options *o, + struct diff_filepair *p) +{ + strbuf_init(msg, PATH_MAX * 2 + 300); + switch (p->status) { + case DIFF_STATUS_COPIED: + strbuf_addf(msg, "similarity index %d%%", similarity_index(p)); + strbuf_addstr(msg, "\ncopy from "); + quote_c_style(name, msg, NULL, 0); + strbuf_addstr(msg, "\ncopy to "); + quote_c_style(other, msg, NULL, 0); + strbuf_addch(msg, '\n'); + break; + case DIFF_STATUS_RENAMED: + strbuf_addf(msg, "similarity index %d%%", similarity_index(p)); + strbuf_addstr(msg, "\nrename from "); + quote_c_style(name, msg, NULL, 0); + strbuf_addstr(msg, "\nrename to "); + quote_c_style(other, msg, NULL, 0); + strbuf_addch(msg, '\n'); + break; + case DIFF_STATUS_MODIFIED: + if (p->score) { + strbuf_addf(msg, "dissimilarity index %d%%\n", + similarity_index(p)); + break; + } + /* fallthru */ + default: + /* nothing */ + ; + } + if (one && two && hashcmp(one->sha1, two->sha1)) { + int abbrev = DIFF_OPT_TST(o, FULL_INDEX) ? 40 : DEFAULT_ABBREV; - setup_diff_attr_check(&attr_diff_check); - if (!git_checkattr(name, 1, &attr_diff_check)) { - const char *value = attr_diff_check.value; - if (!ATTR_TRUE(value) && - !ATTR_FALSE(value) && - !ATTR_UNSET(value)) { - struct ll_diff_driver *drv; - - for (drv = user_diff; drv; drv = drv->next) - if (!strcmp(drv->name, value)) - return drv->cmd; + if (DIFF_OPT_TST(o, BINARY)) { + mmfile_t mf; + if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) || + (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two))) + abbrev = 40; } + strbuf_addf(msg, "index %.*s..%.*s", + abbrev, sha1_to_hex(one->sha1), + abbrev, sha1_to_hex(two->sha1)); + if (one->mode == two->mode) + strbuf_addf(msg, " %06o", one->mode); + strbuf_addch(msg, '\n'); } - return NULL; + if (msg->len) + strbuf_setlen(msg, msg->len - 1); } static void run_diff_cmd(const char *pgm, @@@ -2162,24 -2102,16 +2162,24 @@@ const char *attr_path, struct diff_filespec *one, struct diff_filespec *two, - const char *xfrm_msg, + struct strbuf *msg, struct diff_options *o, - int complete_rewrite) + struct diff_filepair *p) { + const char *xfrm_msg = NULL; + int complete_rewrite = (p->status == DIFF_STATUS_MODIFIED) && p->score; + + if (msg) { + fill_metainfo(msg, name, other, one, two, o, p); + xfrm_msg = msg->len ? msg->buf : NULL; + } + if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL)) pgm = NULL; else { - const char *cmd = external_diff_attr(attr_path); - if (cmd) - pgm = cmd; + struct userdiff_driver *drv = userdiff_find_by_path(attr_path); + if (drv && drv->external) + pgm = drv->external; } if (pgm) { @@@ -2206,13 -2138,18 +2206,13 @@@ static void diff_fill_sha1_info(struct if (lstat(one->path, &st) < 0) die("stat %s", one->path); if (index_path(one->sha1, one->path, &st, 0)) - die("cannot hash %s\n", one->path); + die("cannot hash %s", one->path); } } else hashclr(one->sha1); } -static int similarity_index(struct diff_filepair *p) -{ - return p->score * 100 / MAX_SCORE; -} - static void strip_prefix(int prefix_length, const char **namep, const char **otherp) { /* Strip the prefix but do not molest /dev/null and absolute paths */ @@@ -2226,11 -2163,13 +2226,11 @@@ static void run_diff(struct diff_filepa { const char *pgm = external_diff(); struct strbuf msg; - char *xfrm_msg; struct diff_filespec *one = p->one; struct diff_filespec *two = p->two; const char *name; const char *other; const char *attr_path; - int complete_rewrite = 0; name = p->one->path; other = (strcmp(name, p->two->path) ? p->two->path : NULL); @@@ -2240,34 -2179,83 +2240,34 @@@ if (DIFF_PAIR_UNMERGED(p)) { run_diff_cmd(pgm, name, NULL, attr_path, - NULL, NULL, NULL, o, 0); + NULL, NULL, NULL, o, p); return; } diff_fill_sha1_info(one); diff_fill_sha1_info(two); - strbuf_init(&msg, PATH_MAX * 2 + 300); - switch (p->status) { - case DIFF_STATUS_COPIED: - strbuf_addf(&msg, "similarity index %d%%", similarity_index(p)); - strbuf_addstr(&msg, "\ncopy from "); - quote_c_style(name, &msg, NULL, 0); - strbuf_addstr(&msg, "\ncopy to "); - quote_c_style(other, &msg, NULL, 0); - strbuf_addch(&msg, '\n'); - break; - case DIFF_STATUS_RENAMED: - strbuf_addf(&msg, "similarity index %d%%", similarity_index(p)); - strbuf_addstr(&msg, "\nrename from "); - quote_c_style(name, &msg, NULL, 0); - strbuf_addstr(&msg, "\nrename to "); - quote_c_style(other, &msg, NULL, 0); - strbuf_addch(&msg, '\n'); - break; - case DIFF_STATUS_MODIFIED: - if (p->score) { - strbuf_addf(&msg, "dissimilarity index %d%%\n", - similarity_index(p)); - complete_rewrite = 1; - break; - } - /* fallthru */ - default: - /* nothing */ - ; - } - - if (hashcmp(one->sha1, two->sha1)) { - int abbrev = DIFF_OPT_TST(o, FULL_INDEX) ? 40 : DEFAULT_ABBREV; - - if (DIFF_OPT_TST(o, BINARY)) { - mmfile_t mf; - if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) || - (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two))) - abbrev = 40; - } - strbuf_addf(&msg, "index %.*s..%.*s", - abbrev, sha1_to_hex(one->sha1), - abbrev, sha1_to_hex(two->sha1)); - if (one->mode == two->mode) - strbuf_addf(&msg, " %06o", one->mode); - strbuf_addch(&msg, '\n'); - } - - if (msg.len) - strbuf_setlen(&msg, msg.len - 1); - xfrm_msg = msg.len ? msg.buf : NULL; - if (!pgm && DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) && (S_IFMT & one->mode) != (S_IFMT & two->mode)) { - /* a filepair that changes between file and symlink + /* + * a filepair that changes between file and symlink * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); run_diff_cmd(NULL, name, other, attr_path, - one, null, xfrm_msg, o, 0); + one, null, &msg, o, p); free(null); + strbuf_release(&msg); + null = alloc_filespec(one->path); run_diff_cmd(NULL, name, other, attr_path, - null, two, xfrm_msg, o, 0); + null, two, &msg, o, p); free(null); } else run_diff_cmd(pgm, name, other, attr_path, - one, two, xfrm_msg, o, complete_rewrite); + one, two, &msg, o, p); strbuf_release(&msg); } @@@ -2333,18 -2321,19 +2333,18 @@@ void diff_setup(struct diff_options *op options->break_opt = -1; options->rename_limit = -1; options->dirstat_percent = 3; - DIFF_OPT_CLR(options, DIRSTAT_CUMULATIVE); options->context = 3; options->change = diff_change; options->add_remove = diff_addremove; if (diff_use_color_default > 0) DIFF_OPT_SET(options, COLOR_DIFF); - else - DIFF_OPT_CLR(options, COLOR_DIFF); options->detect_rename = diff_detect_rename_default; - options->a_prefix = "a/"; - options->b_prefix = "b/"; + if (!diff_mnemonic_prefix) { + options->a_prefix = "a/"; + options->b_prefix = "b/"; + } } int diff_setup_done(struct diff_options *options) @@@ -2501,10 -2490,6 +2501,10 @@@ int diff_opt_parse(struct diff_options else if (!strcmp(arg, "--cumulative")) { options->output_format |= DIFF_FORMAT_DIRSTAT; DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE); + } else if (opt_arg(arg, 0, "dirstat-by-file", + &options->dirstat_percent)) { + options->output_format |= DIFF_FORMAT_DIRSTAT; + DIFF_OPT_SET(options, DIRSTAT_BY_FILE); } else if (!strcmp(arg, "--check")) options->output_format |= DIFF_FORMAT_CHECKDIFF; @@@ -2579,8 -2564,6 +2579,8 @@@ options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE; else if (!strcmp(arg, "--ignore-space-at-eol")) options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL; + else if (!strcmp(arg, "--patience")) + options->xdl_opts |= XDF_PATIENCE_DIFF; /* flags options */ else if (!strcmp(arg, "--binary")) { @@@ -2603,10 -2586,6 +2603,10 @@@ DIFF_OPT_CLR(options, COLOR_DIFF); else if (!strcmp(arg, "--color-words")) options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + else if (!prefixcmp(arg, "--color-words=")) { + options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + options->word_regex = arg + 14; + } else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); else if (!strcmp(arg, "--quiet")) @@@ -2615,10 -2594,6 +2615,10 @@@ DIFF_OPT_SET(options, ALLOW_EXTERNAL); else if (!strcmp(arg, "--no-ext-diff")) DIFF_OPT_CLR(options, ALLOW_EXTERNAL); + else if (!strcmp(arg, "--textconv")) + DIFF_OPT_SET(options, ALLOW_TEXTCONV); + else if (!strcmp(arg, "--no-textconv")) + DIFF_OPT_CLR(options, ALLOW_TEXTCONV); else if (!strcmp(arg, "--ignore-submodules")) DIFF_OPT_SET(options, IGNORE_SUBMODULES); @@@ -2652,9 -2627,6 +2652,9 @@@ options->b_prefix = arg + 13; else if (!strcmp(arg, "--no-prefix")) options->a_prefix = options->b_prefix = ""; + else if (opt_arg(arg, '\0', "inter-hunk-context", + &options->interhunkcontext)) + ; else if (!prefixcmp(arg, "--output=")) { options->file = fopen(arg + strlen("--output="), "w"); options->close_file = 1; @@@ -3074,7 -3046,8 +3074,7 @@@ static void diff_summary(FILE *file, st } struct patch_id_t { - struct xdiff_emit_state xm; - SHA_CTX *ctx; + git_SHA_CTX *ctx; int patchlen; }; @@@ -3102,7 -3075,7 +3102,7 @@@ static void patch_id_consume(void *priv new_len = remove_space(line, len); - SHA1_Update(data->ctx, line, new_len); + git_SHA1_Update(data->ctx, line, new_len); data->patchlen += new_len; } @@@ -3111,13 -3084,14 +3111,13 @@@ static int diff_get_patch_id(struct dif { struct diff_queue_struct *q = &diff_queued_diff; int i; - SHA_CTX ctx; + git_SHA_CTX ctx; struct patch_id_t data; char buffer[PATH_MAX * 4 + 20]; - SHA1_Init(&ctx); + git_SHA1_Init(&ctx); memset(&data, 0, sizeof(struct patch_id_t)); data.ctx = &ctx; - data.xm.consume = patch_id_consume; for (i = 0; i < q->nr; i++) { xpparam_t xpp; @@@ -3127,7 -3101,6 +3127,7 @@@ struct diff_filepair *p = q->queue[i]; int len1, len2; + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); if (p->status == 0) return error("internal diff status error"); @@@ -3178,16 -3151,17 +3178,16 @@@ len2, p->two->path, len1, p->one->path, len2, p->two->path); - SHA1_Update(&ctx, buffer, len1); + git_SHA1_Update(&ctx, buffer, len1); xpp.flags = XDF_NEED_MINIMAL; xecfg.ctxlen = 3; xecfg.flags = XDL_EMIT_FUNCNAMES; - ecb.outf = xdiff_outf; - ecb.priv = &data; - xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + xdi_diff_outf(&mf1, &mf2, patch_id_consume, &data, + &xpp, &xecfg, &ecb); } - SHA1_Final(sha1, &ctx); + git_SHA1_Final(sha1, &ctx); return 0; } @@@ -3261,6 -3235,7 +3261,6 @@@ void diff_flush(struct diff_options *op struct diffstat_t diffstat; memset(&diffstat, 0, sizeof(struct diffstat_t)); - diffstat.xm.consume = diffstat_consume; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; if (check_pair_status(p)) @@@ -3557,32 -3532,3 +3557,32 @@@ void diff_unmerge(struct diff_options * fill_filespec(one, sha1, mode); diff_queue(&diff_queued_diff, one, two)->is_unmerged = 1; } + +static char *run_textconv(const char *pgm, struct diff_filespec *spec, + size_t *outsize) +{ + struct diff_tempfile *temp; + const char *argv[3]; + const char **arg = argv; + struct child_process child; + struct strbuf buf = STRBUF_INIT; + + temp = prepare_temp_file(spec->path, spec); + *arg++ = pgm; + *arg++ = temp->name; + *arg = NULL; + + memset(&child, 0, sizeof(child)); + child.argv = argv; + child.out = -1; + if (start_command(&child) != 0 || + strbuf_read(&buf, child.out, 0) < 0 || + finish_command(&child) != 0) { + remove_tempfile(); + error("error running textconv command '%s'", pgm); + return NULL; + } + remove_tempfile(); + + return strbuf_detach(&buf, outsize); +} diff --combined git-repack.sh index 00c597e97c,e02bf27aa6..0868734723 --- a/git-repack.sh +++ b/git-repack.sh @@@ -60,6 -60,7 +60,7 @@@ case ",$all_into_one," i args='--unpacked --incremental' ;; ,t,) + args= existing= if [ -d "$PACKDIR" ]; then for e in `cd "$PACKDIR" && find . -type f -name '*.pack' \ | sed -e 's/^\.\///' -e 's/\.pack$//'` @@@ -67,11 -68,10 +68,10 @@@ if [ -e "$PACKDIR/$e.keep" ]; then : keep else - args="$args --unpacked=$e.pack" existing="$existing $e" fi done - if test -n "$args" -a -n "$unpack_unreachable" -a \ + if test -n "$existing" -a -n "$unpack_unreachable" -a \ -n "$remove_redundant" then args="$args $unpack_unreachable" @@@ -88,79 -88,32 +88,79 @@@ if [ -z "$names" ]; the echo Nothing new to pack. fi fi -for name in $names ; do - fullbases="$fullbases pack-$name" - chmod a-w "$PACKTMP-$name.pack" - chmod a-w "$PACKTMP-$name.idx" - mkdir -p "$PACKDIR" || exit +# Ok we have prepared all new packfiles. +mkdir -p "$PACKDIR" || exit + +# First see if there are packs of the same name and if so +# if we can move them out of the way (this can happen if we +# repacked immediately after packing fully. +rollback= +failed= +for name in $names +do for sfx in pack idx do - if test -f "$PACKDIR/pack-$name.$sfx" - then - mv -f "$PACKDIR/pack-$name.$sfx" \ - "$PACKDIR/old-pack-$name.$sfx" - fi - done && + file=pack-$name.$sfx + test -f "$PACKDIR/$file" || continue + rm -f "$PACKDIR/old-$file" && + mv "$PACKDIR/$file" "$PACKDIR/old-$file" || { + failed=t + break + } + rollback="$rollback $file" + done + test -z "$failed" || break +done + +# If renaming failed for any of them, roll the ones we have +# already renamed back to their original names. +if test -n "$failed" +then + rollback_failure= + for file in $rollback + do + mv "$PACKDIR/old-$file" "$PACKDIR/$file" || + rollback_failure="$rollback_failure $file" + done + if test -n "$rollback_failure" + then + echo >&2 "WARNING: Some packs in use have been renamed by" + echo >&2 "WARNING: prefixing old- to their name, in order to" + echo >&2 "WARNING: replace them with the new version of the" + echo >&2 "WARNING: file. But the operation failed, and" + echo >&2 "WARNING: attempt to rename them back to their" + echo >&2 "WARNING: original names also failed." + echo >&2 "WARNING: Please rename them in $PACKDIR manually:" + for file in $rollback_failure + do + echo >&2 "WARNING: old-$file -> $file" + done + fi + exit 1 +fi + +# Now the ones with the same name are out of the way... +fullbases= +for name in $names +do + fullbases="$fullbases pack-$name" + chmod a-w "$PACKTMP-$name.pack" + chmod a-w "$PACKTMP-$name.idx" mv -f "$PACKTMP-$name.pack" "$PACKDIR/pack-$name.pack" && - mv -f "$PACKTMP-$name.idx" "$PACKDIR/pack-$name.idx" && - test -f "$PACKDIR/pack-$name.pack" && - test -f "$PACKDIR/pack-$name.idx" || { - echo >&2 "Couldn't replace the existing pack with updated one." - echo >&2 "The original set of packs have been saved as" - echo >&2 "old-pack-$name.{pack,idx} in $PACKDIR." - exit 1 - } - rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx" + mv -f "$PACKTMP-$name.idx" "$PACKDIR/pack-$name.idx" || + exit +done + +# Remove the "old-" files +for name in $names +do + rm -f "$PACKDIR/old-pack-$name.idx" + rm -f "$PACKDIR/old-pack-$name.pack" done +# End of pack replacement. + if test "$remove_redundant" = t then # We know $existing are all redundant. @@@ -181,5 -134,5 +181,5 @@@ f case "$no_update_info" in t) : ;; -*) git-update-server-info ;; +*) git update-server-info ;; esac diff --combined revision.c index 286e416b75,50a5b5f394..34ee490ea0 --- a/revision.c +++ b/revision.c @@@ -11,7 -11,6 +11,7 @@@ #include "reflog-walk.h" #include "patch-ids.h" #include "decorate.h" +#include "log-tree.h" volatile show_early_output_fn_t show_early_output; @@@ -183,11 -182,8 +183,11 @@@ static struct commit *handle_commit(str if (!tag->tagged) die("bad tag"); object = parse_object(tag->tagged->sha1); - if (!object) + if (!object) { + if (flags & UNINTERESTING) + return NULL; die("bad object %s", sha1_to_hex(tag->tagged->sha1)); + } } /* @@@ -203,8 -199,6 +203,8 @@@ mark_parents_uninteresting(commit); revs->limited = 1; } + if (revs->show_source && !commit->util) + commit->util = (void *) name; return commit; } @@@ -298,31 -292,10 +298,31 @@@ static void file_change(struct diff_opt DIFF_OPT_SET(options, HAS_CHANGES); } -static int rev_compare_tree(struct rev_info *revs, struct tree *t1, struct tree *t2) +static int rev_compare_tree(struct rev_info *revs, struct commit *parent, struct commit *commit) { + struct tree *t1 = parent->tree; + struct tree *t2 = commit->tree; + if (!t1) return REV_TREE_NEW; + + if (revs->simplify_by_decoration) { + /* + * If we are simplifying by decoration, then the commit + * is worth showing if it has a tag pointing at it. + */ + if (lookup_decoration(&name_decoration, &commit->object)) + return REV_TREE_DIFFERENT; + /* + * A commit that is not pointed by a tag is uninteresting + * if we are not limited by path. This means that you will + * see the usual "commits that touch the paths" plus any + * tagged commit by specifying both --simplify-by-decoration + * and pathspec. + */ + if (!revs->prune_data) + return REV_TREE_SAME; + } if (!t2) return REV_TREE_DIFFERENT; tree_difference = REV_TREE_SAME; @@@ -333,13 -306,12 +333,13 @@@ return tree_difference; } -static int rev_same_tree_as_empty(struct rev_info *revs, struct tree *t1) +static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) { int retval; void *tree; unsigned long size; struct tree_desc empty, real; + struct tree *t1 = commit->tree; if (!t1) return 0; @@@ -373,7 -345,7 +373,7 @@@ static void try_to_simplify_commit(stru return; if (!commit->parents) { - if (rev_same_tree_as_empty(revs, commit->tree)) + if (rev_same_tree_as_empty(revs, commit)) commit->object.flags |= TREESAME; return; } @@@ -393,7 -365,7 +393,7 @@@ die("cannot simplify commit %s (because of %s)", sha1_to_hex(commit->object.sha1), sha1_to_hex(p->object.sha1)); - switch (rev_compare_tree(revs, p->tree, commit->tree)) { + switch (rev_compare_tree(revs, p, commit)) { case REV_TREE_SAME: tree_same = 1; if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) { @@@ -413,7 -385,7 +413,7 @@@ case REV_TREE_NEW: if (revs->remove_empty_trees && - rev_same_tree_as_empty(revs, p->tree)) { + rev_same_tree_as_empty(revs, p)) { /* We are adding all the specified * paths from this parent, so the * history beyond this parent is not @@@ -482,10 -454,9 +482,10 @@@ static int add_parents_to_list(struct r while (parent) { struct commit *p = parent->item; parent = parent->next; + if (p) + p->object.flags |= UNINTERESTING; if (parse_commit(p) < 0) - return -1; - p->object.flags |= UNINTERESTING; + continue; if (p->parents) mark_parents_uninteresting(p); if (p->object.flags & SEEN) @@@ -513,14 -484,12 +513,14 @@@ if (parse_commit(p) < 0) return -1; + if (revs->show_source && !p->util) + p->util = commit->util; p->object.flags |= left_flag; if (!(p->object.flags & SEEN)) { p->object.flags |= SEEN; insert_by_date_cached(p, list, cached_base, cache_ptr); } - if(revs->first_parent_only) + if (revs->first_parent_only) break; } return 0; @@@ -994,16 -963,6 +994,6 @@@ static void add_message_grep(struct rev add_grep(revs, pattern, GREP_PATTERN_BODY); } - static void add_ignore_packed(struct rev_info *revs, const char *name) - { - int num = ++revs->num_ignore_packed; - - revs->ignore_packed = xrealloc(revs->ignore_packed, - sizeof(const char *) * (num + 1)); - revs->ignore_packed[num-1] = name; - revs->ignore_packed[num] = NULL; - } - static int handle_revision_opt(struct rev_info *revs, int argc, const char **argv, int *unkc, const char **unkv) { @@@ -1059,19 -1018,6 +1049,19 @@@ } else if (!strcmp(arg, "--topo-order")) { revs->lifo = 1; revs->topo_order = 1; + } else if (!strcmp(arg, "--simplify-merges")) { + revs->simplify_merges = 1; + revs->rewrite_parents = 1; + revs->simplify_history = 0; + revs->limited = 1; + } else if (!strcmp(arg, "--simplify-by-decoration")) { + revs->simplify_merges = 1; + revs->rewrite_parents = 1; + revs->simplify_history = 0; + revs->simplify_by_decoration = 1; + revs->limited = 1; + revs->prune = 1; + load_ref_decorations(); } else if (!strcmp(arg, "--date-order")) { revs->lifo = 0; revs->topo_order = 1; @@@ -1116,12 -1062,8 +1106,8 @@@ revs->edge_hint = 1; } else if (!strcmp(arg, "--unpacked")) { revs->unpacked = 1; - free(revs->ignore_packed); - revs->ignore_packed = NULL; - revs->num_ignore_packed = 0; } else if (!prefixcmp(arg, "--unpacked=")) { - revs->unpacked = 1; - add_ignore_packed(revs, arg+11); + die("--unpacked= no longer supported."); } else if (!strcmp(arg, "-r")) { revs->diff = 1; DIFF_OPT_SET(&revs->diffopt, RECURSIVE); @@@ -1267,7 -1209,6 +1253,7 @@@ int setup_revisions(int argc, const cha if (!strcmp(arg, "--all")) { handle_refs(revs, flags, for_each_ref); + handle_refs(revs, flags, head_ref); continue; } if (!strcmp(arg, "--branches")) { @@@ -1400,179 -1341,6 +1386,179 @@@ static void add_child(struct rev_info * l->next = add_decoration(&revs->children, &parent->object, l); } +static int remove_duplicate_parents(struct commit *commit) +{ + struct commit_list **pp, *p; + int surviving_parents; + + /* Examine existing parents while marking ones we have seen... */ + pp = &commit->parents; + while ((p = *pp) != NULL) { + struct commit *parent = p->item; + if (parent->object.flags & TMP_MARK) { + *pp = p->next; + continue; + } + parent->object.flags |= TMP_MARK; + pp = &p->next; + } + /* count them while clearing the temporary mark */ + surviving_parents = 0; + for (p = commit->parents; p; p = p->next) { + p->item->object.flags &= ~TMP_MARK; + surviving_parents++; + } + return surviving_parents; +} + +struct merge_simplify_state { + struct commit *simplified; +}; + +static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit) +{ + struct merge_simplify_state *st; + + st = lookup_decoration(&revs->merge_simplification, &commit->object); + if (!st) { + st = xcalloc(1, sizeof(*st)); + add_decoration(&revs->merge_simplification, &commit->object, st); + } + return st; +} + +static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail) +{ + struct commit_list *p; + struct merge_simplify_state *st, *pst; + int cnt; + + st = locate_simplify_state(revs, commit); + + /* + * Have we handled this one? + */ + if (st->simplified) + return tail; + + /* + * An UNINTERESTING commit simplifies to itself, so does a + * root commit. We do not rewrite parents of such commit + * anyway. + */ + if ((commit->object.flags & UNINTERESTING) || !commit->parents) { + st->simplified = commit; + return tail; + } + + /* + * Do we know what commit all of our parents should be rewritten to? + * Otherwise we are not ready to rewrite this one yet. + */ + for (cnt = 0, p = commit->parents; p; p = p->next) { + pst = locate_simplify_state(revs, p->item); + if (!pst->simplified) { + tail = &commit_list_insert(p->item, tail)->next; + cnt++; + } + } + if (cnt) { + tail = &commit_list_insert(commit, tail)->next; + return tail; + } + + /* + * Rewrite our list of parents. + */ + for (p = commit->parents; p; p = p->next) { + pst = locate_simplify_state(revs, p->item); + p->item = pst->simplified; + } + cnt = remove_duplicate_parents(commit); + + /* + * It is possible that we are a merge and one side branch + * does not have any commit that touches the given paths; + * in such a case, the immediate parents will be rewritten + * to different commits. + * + * o----X X: the commit we are looking at; + * / / o: a commit that touches the paths; + * ---o----' + * + * Further reduce the parents by removing redundant parents. + */ + if (1 < cnt) { + struct commit_list *h = reduce_heads(commit->parents); + cnt = commit_list_count(h); + free_commit_list(commit->parents); + commit->parents = h; + } + + /* + * A commit simplifies to itself if it is a root, if it is + * UNINTERESTING, if it touches the given paths, or if it is a + * merge and its parents simplifies to more than one commits + * (the first two cases are already handled at the beginning of + * this function). + * + * Otherwise, it simplifies to what its sole parent simplifies to. + */ + if (!cnt || + (commit->object.flags & UNINTERESTING) || + !(commit->object.flags & TREESAME) || + (1 < cnt)) + st->simplified = commit; + else { + pst = locate_simplify_state(revs, commit->parents->item); + st->simplified = pst->simplified; + } + return tail; +} + +static void simplify_merges(struct rev_info *revs) +{ + struct commit_list *list; + struct commit_list *yet_to_do, **tail; + + if (!revs->topo_order) + sort_in_topological_order(&revs->commits, revs->lifo); + if (!revs->prune) + return; + + /* feed the list reversed */ + yet_to_do = NULL; + for (list = revs->commits; list; list = list->next) + commit_list_insert(list->item, &yet_to_do); + while (yet_to_do) { + list = yet_to_do; + yet_to_do = NULL; + tail = &yet_to_do; + while (list) { + struct commit *commit = list->item; + struct commit_list *next = list->next; + free(list); + list = next; + tail = simplify_one(revs, commit, tail); + } + } + + /* clean up the result, removing the simplified ones */ + list = revs->commits; + revs->commits = NULL; + tail = &revs->commits; + while (list) { + struct commit *commit = list->item; + struct commit_list *next = list->next; + struct merge_simplify_state *st; + free(list); + list = next; + st = locate_simplify_state(revs, commit); + if (st->simplified == commit) + tail = &commit_list_insert(commit, tail)->next; + } +} + static void set_children(struct rev_info *revs) { struct commit_list *l; @@@ -1613,8 -1381,6 +1599,8 @@@ int prepare_revision_walk(struct rev_in return -1; if (revs->topo_order) sort_in_topological_order(&revs->commits, revs->lifo); + if (revs->simplify_merges) + simplify_merges(revs); if (revs->children.name) set_children(revs); return 0; @@@ -1647,6 -1413,26 +1633,6 @@@ static enum rewrite_result rewrite_one( } } -static void remove_duplicate_parents(struct commit *commit) -{ - struct commit_list **pp, *p; - - /* Examine existing parents while marking ones we have seen... */ - pp = &commit->parents; - while ((p = *pp) != NULL) { - struct commit *parent = p->item; - if (parent->object.flags & TMP_MARK) { - *pp = p->next; - continue; - } - parent->object.flags |= TMP_MARK; - pp = &p->next; - } - /* ... and clear the temporary mark */ - for (p = commit->parents; p; p = p->next) - p->item->object.flags &= ~TMP_MARK; -} - static int rewrite_parents(struct rev_info *revs, struct commit *commit) { struct commit_list **pp = &commit->parents; @@@ -1685,7 -1471,7 +1671,7 @@@ enum commit_action simplify_commit(stru { if (commit->object.flags & SHOWN) return commit_ignore; - if (revs->unpacked && has_sha1_pack(commit->object.sha1, revs->ignore_packed)) + if (revs->unpacked && has_sha1_pack(commit->object.sha1)) return commit_ignore; if (revs->show_all) return commit_show; @@@ -1738,16 -1524,14 +1724,16 @@@ static struct commit *get_revision_1(st (commit->date < revs->max_age)) continue; if (add_parents_to_list(revs, commit, &revs->commits, NULL) < 0) - return NULL; + die("Failed to traverse parents of commit %s", + sha1_to_hex(commit->object.sha1)); } switch (simplify_commit(revs, commit)) { case commit_ignore: continue; case commit_error: - return NULL; + die("Failed to simplify parents of commit %s", + sha1_to_hex(commit->object.sha1)); default: return commit; } @@@ -1835,6 -1619,26 +1821,6 @@@ static struct commit *get_revision_inte return c; } - if (revs->reverse) { - int limit = -1; - - if (0 <= revs->max_count) { - limit = revs->max_count; - if (0 < revs->skip_count) - limit += revs->skip_count; - } - l = NULL; - while ((c = get_revision_1(revs))) { - commit_list_insert(c, &l); - if ((0 < limit) && !--limit) - break; - } - revs->commits = l; - revs->reverse = 0; - revs->max_count = -1; - c = NULL; - } - /* * Now pick up what they want to give us */ @@@ -1907,23 -1711,7 +1893,23 @@@ struct commit *get_revision(struct rev_info *revs) { - struct commit *c = get_revision_internal(revs); + struct commit *c; + struct commit_list *reversed; + + if (revs->reverse) { + reversed = NULL; + while ((c = get_revision_internal(revs))) { + commit_list_insert(c, &reversed); + } + revs->commits = reversed; + revs->reverse = 0; + revs->reverse_output_stage = 1; + } + + if (revs->reverse_output_stage) + return pop_commit(&revs->commits); + + c = get_revision_internal(revs); if (c && revs->graph) graph_update(revs->graph, c); return c; diff --combined revision.h index 7cf848771b,1d322759aa..66d211ac2e --- a/revision.h +++ b/revision.h @@@ -42,22 -42,17 +42,22 @@@ struct rev_info simplify_history:1, lifo:1, topo_order:1, + simplify_merges:1, + simplify_by_decoration:1, tag_objects:1, tree_objects:1, blob_objects:1, edge_hint:1, limited:1, - unpacked:1, /* see also ignore_packed below */ + unpacked:1, boundary:2, left_right:1, rewrite_parents:1, print_parents:1, + show_source:1, + show_decorations:1, reverse:1, + reverse_output_stage:1, cherry_pick:1, first_parent_only:1; @@@ -80,9 -75,6 +80,6 @@@ missing_newline:1; enum date_mode date_mode; - const char **ignore_packed; /* pretend objects in these are unpacked */ - int num_ignore_packed; - unsigned int abbrev; enum cmit_fmt commit_format; struct log_info *loginfo; @@@ -115,7 -107,6 +112,7 @@@ struct reflog_walk_info *reflog_info; struct decoration children; + struct decoration merge_simplification; }; #define REV_TREE_SAME 0 diff --combined sha1_file.c index a07aa4e5c4,500fd93127..37e833b77d --- a/sha1_file.c +++ b/sha1_file.c @@@ -99,11 -99,7 +99,11 @@@ int safe_create_leading_directories(cha pos = strchr(pos, '/'); if (!pos) break; - *pos = 0; + while (*++pos == '/') + ; + if (!*pos) + break; + *--pos = '\0'; if (!stat(path, &st)) { /* path exists */ if (!S_ISDIR(st.st_mode)) { @@@ -254,6 -250,7 +254,6 @@@ static void read_info_alternates(const */ static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth) { - struct stat st; const char *objdir = get_object_directory(); struct alternate_object_database *ent; struct alternate_object_database *alt; @@@ -284,7 -281,7 +284,7 @@@ ent->base[pfxlen] = ent->base[entlen-1] = 0; /* Detect cases where alternate disappeared */ - if (stat(ent->base, &st) || !S_ISDIR(st.st_mode)) { + if (!is_directory(ent->base)) { error("object directory %s does not exist; " "check .git/objects/info/alternates.", ent->base); @@@ -397,16 -394,6 +397,16 @@@ void add_to_alternates_file(const char link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0); } +void foreach_alt_odb(alt_odb_fn fn, void *cb) +{ + struct alternate_object_database *ent; + + prepare_alt_odb(); + for (ent = alt_odb_list; ent; ent = ent->next) + if (fn(ent, cb)) + return; +} + void prepare_alt_odb(void) { const char *alt; @@@ -689,7 -676,6 +689,7 @@@ void free_pack_by_name(const char *pack while (*pp) { p = *pp; if (strcmp(pack_name, p->pack_name) == 0) { + clear_delta_base_cache(); close_pack_windows(p); if (p->pack_fd != -1) close(p->pack_fd); @@@ -801,7 -787,7 +801,7 @@@ unsigned char* use_pack(struct packed_g if (p->pack_fd == -1 && open_packed_git(p)) die("packfile %s cannot be accessed", p->pack_name); - /* Since packfiles end in a hash of their content and its + /* Since packfiles end in a hash of their content and it's * pointless to ask for an offset into the middle of that * hash, and the in_window function above wouldn't match * don't allow an offset too close to the end of the file. @@@ -1154,8 -1140,7 +1154,8 @@@ static int legacy_loose_object(unsigne return 0; } -unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) +unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) { unsigned shift; unsigned char c; @@@ -1167,10 -1152,10 +1167,10 @@@ size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used) - return 0; - if (sizeof(long) * 8 <= shift) + if (len <= used || sizeof(long) * 8 <= shift) { + error("bad object header"); return 0; + } c = buf[used++]; size += (c & 0x7f) << shift; shift += 7; @@@ -1197,8 -1182,8 +1197,8 @@@ static int unpack_sha1_header(z_stream stream->avail_out = bufsiz; if (legacy_loose_object(map)) { - inflateInit(stream); - return inflate(stream, 0); + git_inflate_init(stream); + return git_inflate(stream, 0); } @@@ -1209,7 -1194,7 +1209,7 @@@ * really worth it and we don't write it any longer. But we * can still read it. */ - used = unpack_object_header_gently(map, mapsize, &type, &size); + used = unpack_object_header_buffer(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; map += used; @@@ -1218,7 -1203,7 +1218,7 @@@ /* Set up the stream for the rest.. */ stream->next_in = map; stream->avail_in = mapsize; - inflateInit(stream); + git_inflate_init(stream); /* And generate the fake traditional header */ stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu", @@@ -1255,11 -1240,11 +1255,11 @@@ static void *unpack_sha1_rest(z_stream stream->next_out = buf + bytes; stream->avail_out = size - bytes; while (status == Z_OK) - status = inflate(stream, Z_FINISH); + status = git_inflate(stream, Z_FINISH); } buf[size] = 0; if (status == Z_STREAM_END && !stream->avail_in) { - inflateEnd(stream); + git_inflate_end(stream); return buf; } @@@ -1349,19 -1334,17 +1349,19 @@@ unsigned long get_size_from_delta(struc stream.next_out = delta_head; stream.avail_out = sizeof(delta_head); - inflateInit(&stream); + git_inflate_init(&stream); do { in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; - st = inflate(&stream, Z_FINISH); + st = git_inflate(&stream, Z_FINISH); curpos += stream.next_in - in; } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); - inflateEnd(&stream); - if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) - die("delta data unpack-initial failed"); + git_inflate_end(&stream); + if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) { + error("delta data unpack-initial failed"); + return 0; + } /* Examine the initial part of the delta to figure out * the result size. @@@ -1402,7 -1385,7 +1402,7 @@@ static off_t get_delta_base(struct pack base_offset = (base_offset << 7) + (c & 127); } base_offset = delta_obj_offset - base_offset; - if (base_offset >= delta_obj_offset) + if (base_offset <= 0 || base_offset >= delta_obj_offset) return 0; /* out of bound */ *curpos += used; } else if (type == OBJ_REF_DELTA) { @@@ -1428,32 -1411,15 +1428,32 @@@ static int packed_delta_info(struct pac off_t base_offset; base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); + if (!base_offset) + return OBJ_BAD; type = packed_object_info(p, base_offset, NULL); + if (type <= OBJ_NONE) { + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return OBJ_BAD; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); + mark_bad_packed_object(p, base_sha1); + type = sha1_object_info(base_sha1, NULL); + if (type <= OBJ_NONE) + return OBJ_BAD; + } /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (sizep) + if (sizep) { *sizep = get_size_from_delta(p, w_curs, curpos); + if (*sizep == 0) + type = OBJ_BAD; + } return type; } @@@ -1475,11 -1441,10 +1475,11 @@@ static int unpack_object_header(struct * insane, so we know won't exceed what we have been given. */ base = use_pack(p, w_curs, *curpos, &left); - used = unpack_object_header_gently(base, left, &type, sizep); - if (!used) - die("object offset outside of pack file"); - *curpos += used; + used = unpack_object_header_buffer(base, left, &type, sizep); + if (!used) { + type = OBJ_BAD; + } else + *curpos += used; return type; } @@@ -1563,9 -1528,8 +1563,9 @@@ static int packed_object_info(struct pa *sizep = size; break; default: - die("pack %s contains unknown object type %d", - p->pack_name, type); + error("unknown object type %i at offset %"PRIuMAX" in %s", + type, (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; } unuse_pack(&w_curs); return type; @@@ -1586,14 -1550,14 +1586,14 @@@ static void *unpack_compressed_entry(st stream.next_out = buffer; stream.avail_out = size; - inflateInit(&stream); + git_inflate_init(&stream); do { in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; - st = inflate(&stream, Z_FINISH); + st = git_inflate(&stream, Z_FINISH); curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); - inflateEnd(&stream); + git_inflate_end(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { free(buffer); return NULL; @@@ -1637,9 -1601,11 +1637,9 @@@ static void *cache_or_unpack_entry(stru struct delta_base_cache_entry *ent = delta_base_cache + hash; ret = ent->data; - if (ret && ent->p == p && ent->base_offset == base_offset) - goto found_cache_entry; - return unpack_entry(p, base_offset, type, base_size); + if (!ret || ent->p != p || ent->base_offset != base_offset) + return unpack_entry(p, base_offset, type, base_size); -found_cache_entry: if (!keep_cache) { ent->data = NULL; ent->lru.next->prev = ent->lru.prev; @@@ -1664,13 -1630,6 +1664,13 @@@ static inline void release_delta_base_c } } +void clear_delta_base_cache(void) +{ + unsigned long p; + for (p = 0; p < MAX_DELTA_CACHE; p++) + release_delta_base_cache(&delta_base_cache[p]); +} + static void add_delta_base_cache(struct packed_git *p, off_t base_offset, void *base, unsigned long base_size, enum object_type type) { @@@ -1708,9 -1667,6 +1708,9 @@@ delta_base_cache_lru.prev = &ent->lru; } +static void *read_object(const unsigned char *sha1, enum object_type *type, + unsigned long *size); + static void *unpack_delta_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, @@@ -1739,12 -1695,9 +1739,12 @@@ * This is costly but should happen only in the presence * of a corrupted pack, and is better than failing outright. */ - struct revindex_entry *revidx = find_pack_revindex(p, base_offset); - const unsigned char *base_sha1 = - nth_packed_object_sha1(p, revidx->nr); + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return NULL; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", sha1_to_hex(base_sha1), (uintmax_t)base_offset, @@@ -1773,8 -1726,6 +1773,8 @@@ return result; } +int do_check_packed_object_crc; + void *unpack_entry(struct packed_git *p, off_t obj_offset, enum object_type *type, unsigned long *sizep) { @@@ -1782,20 -1733,6 +1782,20 @@@ off_t curpos = obj_offset; void *data; + if (do_check_packed_object_crc && p->index_version > 1) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + unsigned long len = revidx[1].offset - obj_offset; + if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + const unsigned char *sha1 = + nth_packed_object_sha1(p, revidx->nr); + error("bad packed object CRC for %s", + sha1_to_hex(sha1)); + mark_bad_packed_object(p, sha1); + unuse_pack(&w_curs); + return NULL; + } + } + *type = unpack_object_header(p, &w_curs, &curpos, sizep); switch (*type) { case OBJ_OFS_DELTA: @@@ -1919,25 -1856,7 +1919,7 @@@ off_t find_pack_entry_one(const unsigne return 0; } - int matches_pack_name(struct packed_git *p, const char *name) - { - const char *last_c, *c; - - if (!strcmp(p->pack_name, name)) - return 1; - - for (c = p->pack_name, last_c = c; *c;) - if (*c == '/') - last_c = ++c; - else - ++c; - if (!strcmp(last_c, name)) - return 1; - - return 0; - } - - static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) + static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { static struct packed_git *last_found = (void *)1; struct packed_git *p; @@@ -1949,15 -1868,6 +1931,6 @@@ p = (last_found == (void *)1) ? packed_git : last_found; do { - if (ignore_packed) { - const char **ig; - for (ig = ignore_packed; *ig; ig++) - if (matches_pack_name(p, *ig)) - break; - if (*ig) - goto next; - } - if (p->num_bad_objects) { unsigned i; for (i = 0; i < p->num_bad_objects; i++) @@@ -2028,7 -1938,7 +2001,7 @@@ static int sha1_loose_object_info(cons status = error("unable to parse %s header", sha1_to_hex(sha1)); else if (sizep) *sizep = size; - inflateEnd(&stream); + git_inflate_end(&stream); munmap(map, mapsize); return status; } @@@ -2038,7 -1948,7 +2011,7 @@@ int sha1_object_info(const unsigned cha struct pack_entry e; int status; - if (!find_pack_entry(sha1, &e, NULL)) { + if (!find_pack_entry(sha1, &e)) { /* Most likely it's a loose object. */ status = sha1_loose_object_info(sha1, sizep); if (status >= 0) @@@ -2046,17 -1956,10 +2019,17 @@@ /* Not a loose object; someone else may have just packed it. */ reprepare_packed_git(); - if (!find_pack_entry(sha1, &e, NULL)) + if (!find_pack_entry(sha1, &e)) return status; } - return packed_object_info(e.p, e.offset, sizep); + + status = packed_object_info(e.p, e.offset, sizep); + if (status < 0) { + mark_bad_packed_object(e.p, sha1); + status = sha1_object_info(sha1, sizep); + } + + return status; } static void *read_packed_sha1(const unsigned char *sha1, @@@ -2065,7 -1968,7 +2038,7 @@@ struct pack_entry e; void *data; - if (!find_pack_entry(sha1, &e, NULL)) + if (!find_pack_entry(sha1, &e)) return NULL; data = cache_or_unpack_entry(e.p, e.offset, size, type, 1); if (!data) { @@@ -2098,7 -2001,9 +2071,7 @@@ static struct cached_object static int cached_object_nr, cached_object_alloc; static struct cached_object empty_tree = { - /* empty tree sha1: 4b825dc642cb6eb9a060e54bf8d69288fbee4904 */ - "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" - "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04", + EMPTY_TREE_SHA1_BIN, OBJ_TREE, "", 0 @@@ -2141,8 -2046,8 +2114,8 @@@ int pretend_sha1_file(void *buf, unsign return 0; } -void *read_object(const unsigned char *sha1, enum object_type *type, - unsigned long *size) +static void *read_object(const unsigned char *sha1, enum object_type *type, + unsigned long *size) { unsigned long mapsize; void *map, *buf; @@@ -2230,16 -2135,16 +2203,16 @@@ static void write_sha1_file_prepare(con const char *type, unsigned char *sha1, char *hdr, int *hdrlen) { - SHA_CTX c; + git_SHA_CTX c; /* Generate the header */ *hdrlen = sprintf(hdr, "%s %lu", type, len)+1; /* Sha1.. */ - SHA1_Init(&c); - SHA1_Update(&c, hdr, *hdrlen); - SHA1_Update(&c, buf, len); - SHA1_Final(sha1, &c); + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, *hdrlen); + git_SHA1_Update(&c, buf, len); + git_SHA1_Final(sha1, &c); } /* @@@ -2301,7 -2206,7 +2274,7 @@@ static void close_sha1_file(int fd fsync_or_die(fd, "sha1 file"); fchmod(fd, 0444); if (close(fd) != 0) - die("unable to write sha1 file"); + die("error when closing sha1 file (%s)", strerror(errno)); } /* Size of directory component, including the ending '/' */ @@@ -2348,8 -2253,7 +2321,8 @@@ static int create_tmpfile(char *buffer static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, void *buf, unsigned long len, time_t mtime) { - int fd, size, ret; + int fd, ret; + size_t size; unsigned char *compressed; z_stream stream; char *filename; @@@ -2464,36 -2368,66 +2437,36 @@@ int has_pack_file(const unsigned char * return 1; } - int has_sha1_pack(const unsigned char *sha1, const char **ignore_packed) + int has_sha1_pack(const unsigned char *sha1) { struct pack_entry e; - return find_pack_entry(sha1, &e, ignore_packed); + return find_pack_entry(sha1, &e); } int has_sha1_file(const unsigned char *sha1) { struct pack_entry e; - if (find_pack_entry(sha1, &e, NULL)) + if (find_pack_entry(sha1, &e)) return 1; return has_loose_object(sha1); } -int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object) -{ - struct strbuf buf; - int ret; - - strbuf_init(&buf, 0); - if (strbuf_read(&buf, fd, 4096) < 0) { - strbuf_release(&buf); - return -1; - } - - if (!type) - type = blob_type; - if (write_object) - ret = write_sha1_file(buf.buf, buf.len, type, sha1); - else - ret = hash_sha1_file(buf.buf, buf.len, type, sha1); - strbuf_release(&buf); - - return ret; -} - -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path) +static int index_mem(unsigned char *sha1, void *buf, size_t size, + int write_object, enum object_type type, const char *path) { - size_t size = xsize_t(st->st_size); - void *buf = NULL; int ret, re_allocated = 0; - if (size) - buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (!type) type = OBJ_BLOB; /* * Convert blobs to git internal format */ - if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { - struct strbuf nbuf; - strbuf_init(&nbuf, 0); + if ((type == OBJ_BLOB) && path) { + struct strbuf nbuf = STRBUF_INIT; if (convert_to_git(path, buf, size, &nbuf, write_object ? safe_crlf : 0)) { - munmap(buf, size); buf = strbuf_detach(&nbuf, &size); re_allocated = 1; } @@@ -2503,39 -2437,20 +2476,39 @@@ ret = write_sha1_file(buf, size, typename(type), sha1); else ret = hash_sha1_file(buf, size, typename(type), sha1); - if (re_allocated) { + if (re_allocated) free(buf); - return ret; - } - if (size) + return ret; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, + enum object_type type, const char *path) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) { + struct strbuf sbuf = STRBUF_INIT; + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, + type, path); + else + ret = -1; + strbuf_release(&sbuf); + } else if (size) { + void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + ret = index_mem(sha1, buf, size, write_object, type, path); munmap(buf, size); + } else + ret = index_mem(sha1, NULL, size, write_object, type, path); + close(fd); return ret; } int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) { int fd; - char *target; - size_t len; + struct strbuf sb = STRBUF_INIT; switch (st->st_mode & S_IFMT) { case S_IFREG: @@@ -2548,17 -2463,20 +2521,17 @@@ path); break; case S_IFLNK: - len = xsize_t(st->st_size); - target = xmalloc(len + 1); - if (readlink(path, target, len + 1) != st->st_size) { + if (strbuf_readlink(&sb, path, st->st_size)) { char *errstr = strerror(errno); - free(target); return error("readlink(\"%s\"): %s", path, errstr); } if (!write_object) - hash_sha1_file(target, len, blob_type, sha1); - else if (write_sha1_file(target, len, blob_type, sha1)) + hash_sha1_file(sb.buf, sb.len, blob_type, sha1); + else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database", path); - free(target); + strbuf_release(&sb); break; case S_IFDIR: return resolve_gitlink_ref(path, "HEAD", sha1); diff --combined t/t7700-repack.sh index f5682d66db,9ce546e3b2..6b29bff782 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@@ -69,24 -69,66 +69,85 @@@ test_expect_success 'packed obs in alt done ' +test_expect_failure 'packed obs in alt ODB are repacked when local repo has packs' ' + rm -f .git/objects/pack/* && + echo new_content >> file1 && + git add file1 && + git commit -m more_content && + git repack && + git repack -a -d && + myidx=$(ls -1 .git/objects/pack/*.idx) && + test -f "$myidx" && + for p in alt_objects/pack/*.idx; do + git verify-pack -v $p | sed -n -e "/^[0-9a-f]\{40\}/p" + done | while read sha1 rest; do + if ! ( git verify-pack -v $myidx | grep "^$sha1" ); then + echo "Missing object in local pack: $sha1" + return 1 + fi + done +' + + test_expect_success 'packed obs in alternate ODB kept pack are repacked' ' + # swap the .keep so the commit object is in the pack with .keep + for p in alt_objects/pack/*.pack + do + base_name=$(basename $p .pack) + if test -f alt_objects/pack/$base_name.keep + then + rm alt_objects/pack/$base_name.keep + else + touch alt_objects/pack/$base_name.keep + fi + done + git repack -a -d && + myidx=$(ls -1 .git/objects/pack/*.idx) && + test -f "$myidx" && + for p in alt_objects/pack/*.idx; do + git verify-pack -v $p | sed -n -e "/^[0-9a-f]\{40\}/p" + done | while read sha1 rest; do + if ! ( git verify-pack -v $myidx | grep "^$sha1" ); then + echo "Missing object in local pack: $sha1" + return 1 + fi + done + ' + + test_expect_success 'packed unreachable obs in alternate ODB are not loosened' ' + rm -f alt_objects/pack/*.keep && + mv .git/objects/pack/* alt_objects/pack/ && + csha1=$(git rev-parse HEAD^{commit}) && + git reset --hard HEAD^ && + sleep 1 && + git reflog expire --expire=now --expire-unreachable=now --all && + # The pack-objects call on the next line is equivalent to + # git repack -A -d without the call to prune-packed + git pack-objects --honor-pack-keep --non-empty --all --reflog \ + --unpack-unreachable .git/objects/info/alternates && + test_must_fail git show $csha1 + ' + + test_expect_success 'local packed unreachable obs that exist in alternate ODB are not loosened' ' + echo `pwd`/alt_objects > .git/objects/info/alternates && + echo "$csha1" | git pack-objects --non-empty --all --reflog pack && + rm -f .git/objects/pack/* && + mv pack-* .git/objects/pack/ && + # The pack-objects call on the next line is equivalent to + # git repack -A -d without the call to prune-packed + git pack-objects --honor-pack-keep --non-empty --all --reflog \ + --unpack-unreachable .git/objects/info/alternates && + test_must_fail git show $csha1 + ' + test_done