Merge branch 'jc/index-pack'
authorJunio C Hamano <gitster@pobox.com>
Tue, 19 Jul 2011 16:54:51 +0000 (09:54 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 19 Jul 2011 16:54:51 +0000 (09:54 -0700)
* jc/index-pack:
verify-pack: use index-pack --verify
index-pack: show histogram when emulating "verify-pack -v"
index-pack: start learning to emulate "verify-pack -v"
index-pack: a miniscule refactor
index-pack --verify: read anomalous offsets from v2 idx file
write_idx_file: need_large_offset() helper function
index-pack: --verify
write_idx_file: introduce a struct to hold idx customization options
index-pack: group the delta-base array entries also by type

Conflicts:
builtin/verify-pack.c
cache.h
sha1_file.c

builtin/index-pack.c
builtin/pack-objects.c
builtin/verify-pack.c
cache.h
csum-file.c
csum-file.h
fast-import.c
pack-write.c
pack.h
sha1_file.c
t/t5302-pack-index.sh
index 81cdc28b30731e722eaa2af045a3464bc83f0e16..0945adbb3bb188b612341c31c8986fabb491928d 100644 (file)
@@ -11,7 +11,7 @@
 #include "exec_cmd.h"
 
 static const char index_pack_usage[] =
-"git index-pack [-v] [-o <index-file>] [ --keep | --keep=<msg> ] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
+"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
 
 struct object_entry {
        struct pack_idx_entry idx;
@@ -19,6 +19,8 @@ struct object_entry {
        unsigned int hdr_size;
        enum object_type type;
        enum object_type real_type;
+       unsigned delta_depth;
+       int base_object_no;
 };
 
 union delta_base {
@@ -66,6 +68,7 @@ static struct progress *progress;
 static unsigned char input_buffer[4096];
 static unsigned int input_offset, input_len;
 static off_t consumed_bytes;
+static unsigned deepest_delta;
 static git_SHA_CTX input_ctx;
 static uint32_t input_crc32;
 static int input_fd, output_fd, pack_fd;
@@ -389,7 +392,18 @@ static void *get_data_from_pack(struct object_entry *obj)
        return data;
 }
 
-static int find_delta(const union delta_base *base)
+static int compare_delta_bases(const union delta_base *base1,
+                              const union delta_base *base2,
+                              enum object_type type1,
+                              enum object_type type2)
+{
+       int cmp = type1 - type2;
+       if (cmp)
+               return cmp;
+       return memcmp(base1, base2, UNION_BASE_SZ);
+}
+
+static int find_delta(const union delta_base *base, enum object_type type)
 {
        int first = 0, last = nr_deltas;
 
@@ -398,7 +412,8 @@ static int find_delta(const union delta_base *base)
                 struct delta_entry *delta = &deltas[next];
                 int cmp;
 
-                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
+               cmp = compare_delta_bases(base, &delta->base,
+                                         type, objects[delta->obj_no].type);
                 if (!cmp)
                         return next;
                 if (cmp < 0) {
@@ -411,9 +426,10 @@ static int find_delta(const union delta_base *base)
 }
 
 static void find_delta_children(const union delta_base *base,
-                               int *first_index, int *last_index)
+                               int *first_index, int *last_index,
+                               enum object_type type)
 {
-       int first = find_delta(base);
+       int first = find_delta(base, type);
        int last = first;
        int end = nr_deltas - 1;
 
@@ -483,12 +499,17 @@ static void sha1_object(const void *data, unsigned long size,
        }
 }
 
+static int is_delta_type(enum object_type type)
+{
+       return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
+}
+
 static void *get_base_data(struct base_data *c)
 {
        if (!c->data) {
                struct object_entry *obj = c->obj;
 
-               if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+               if (is_delta_type(obj->type)) {
                        void *base = get_base_data(c->base);
                        void *raw = get_data_from_pack(obj);
                        c->data = patch_delta(
@@ -515,6 +536,10 @@ static void resolve_delta(struct object_entry *delta_obj,
        void *base_data, *delta_data;
 
        delta_obj->real_type = base->obj->real_type;
+       delta_obj->delta_depth = base->obj->delta_depth + 1;
+       if (deepest_delta < delta_obj->delta_depth)
+               deepest_delta = delta_obj->delta_depth;
+       delta_obj->base_object_no = base->obj - objects;
        delta_data = get_data_from_pack(delta_obj);
        base_data = get_base_data(base);
        result->obj = delta_obj;
@@ -541,11 +566,13 @@ static void find_unresolved_deltas(struct base_data *base,
                union delta_base base_spec;
 
                hashcpy(base_spec.sha1, base->obj->idx.sha1);
-               find_delta_children(&base_spec, &ref_first, &ref_last);
+               find_delta_children(&base_spec,
+                                   &ref_first, &ref_last, OBJ_REF_DELTA);
 
                memset(&base_spec, 0, sizeof(base_spec));
                base_spec.offset = base->obj->idx.offset;
-               find_delta_children(&base_spec, &ofs_first, &ofs_last);
+               find_delta_children(&base_spec,
+                                   &ofs_first, &ofs_last, OBJ_OFS_DELTA);
        }
 
        if (ref_last == -1 && ofs_last == -1) {
@@ -557,24 +584,24 @@ static void find_unresolved_deltas(struct base_data *base,
 
        for (i = ref_first; i <= ref_last; i++) {
                struct object_entry *child = objects + deltas[i].obj_no;
-               if (child->real_type == OBJ_REF_DELTA) {
-                       struct base_data result;
-                       resolve_delta(child, base, &result);
-                       if (i == ref_last && ofs_last == -1)
-                               free_base_data(base);
-                       find_unresolved_deltas(&result, base);
-               }
+               struct base_data result;
+
+               assert(child->real_type == OBJ_REF_DELTA);
+               resolve_delta(child, base, &result);
+               if (i == ref_last && ofs_last == -1)
+                       free_base_data(base);
+               find_unresolved_deltas(&result, base);
        }
 
        for (i = ofs_first; i <= ofs_last; i++) {
                struct object_entry *child = objects + deltas[i].obj_no;
-               if (child->real_type == OBJ_OFS_DELTA) {
-                       struct base_data result;
-                       resolve_delta(child, base, &result);
-                       if (i == ofs_last)
-                               free_base_data(base);
-                       find_unresolved_deltas(&result, base);
-               }
+               struct base_data result;
+
+               assert(child->real_type == OBJ_OFS_DELTA);
+               resolve_delta(child, base, &result);
+               if (i == ofs_last)
+                       free_base_data(base);
+               find_unresolved_deltas(&result, base);
        }
 
        unlink_base_data(base);
@@ -584,7 +611,11 @@ static int compare_delta_entry(const void *a, const void *b)
 {
        const struct delta_entry *delta_a = a;
        const struct delta_entry *delta_b = b;
-       return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
+
+       /* group by type (ref vs ofs) and then by value (sha-1 or offset) */
+       return compare_delta_bases(&delta_a->base, &delta_b->base,
+                                  objects[delta_a->obj_no].type,
+                                  objects[delta_b->obj_no].type);
 }
 
 /* Parse all objects and return the pack content SHA1 hash */
@@ -608,7 +639,7 @@ static void parse_pack_objects(unsigned char *sha1)
                struct object_entry *obj = &objects[i];
                void *data = unpack_raw_entry(obj, &delta->base);
                obj->real_type = obj->type;
-               if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+               if (is_delta_type(obj->type)) {
                        nr_deltas++;
                        delta->obj_no = i;
                        delta++;
@@ -655,7 +686,7 @@ static void parse_pack_objects(unsigned char *sha1)
                struct object_entry *obj = &objects[i];
                struct base_data base_obj;
 
-               if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
+               if (is_delta_type(obj->type))
                        continue;
                base_obj.obj = obj;
                base_obj.data = NULL;
@@ -859,24 +890,137 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
 
 static int git_index_pack_config(const char *k, const char *v, void *cb)
 {
+       struct pack_idx_option *opts = cb;
+
        if (!strcmp(k, "pack.indexversion")) {
-               pack_idx_default_version = git_config_int(k, v);
-               if (pack_idx_default_version > 2)
-                       die("bad pack.indexversion=%"PRIu32,
-                               pack_idx_default_version);
+               opts->version = git_config_int(k, v);
+               if (opts->version > 2)
+                       die("bad pack.indexversion=%"PRIu32, opts->version);
                return 0;
        }
        return git_default_config(k, v, cb);
 }
 
+static int cmp_uint32(const void *a_, const void *b_)
+{
+       uint32_t a = *((uint32_t *)a_);
+       uint32_t b = *((uint32_t *)b_);
+
+       return (a < b) ? -1 : (a != b);
+}
+
+static void read_v2_anomalous_offsets(struct packed_git *p,
+                                     struct pack_idx_option *opts)
+{
+       const uint32_t *idx1, *idx2;
+       uint32_t i;
+
+       /* The address of the 4-byte offset table */
+       idx1 = (((const uint32_t *)p->index_data)
+               + 2 /* 8-byte header */
+               + 256 /* fan out */
+               + 5 * p->num_objects /* 20-byte SHA-1 table */
+               + p->num_objects /* CRC32 table */
+               );
+
+       /* The address of the 8-byte offset table */
+       idx2 = idx1 + p->num_objects;
+
+       for (i = 0; i < p->num_objects; i++) {
+               uint32_t off = ntohl(idx1[i]);
+               if (!(off & 0x80000000))
+                       continue;
+               off = off & 0x7fffffff;
+               if (idx2[off * 2])
+                       continue;
+               /*
+                * The real offset is ntohl(idx2[off * 2]) in high 4
+                * octets, and ntohl(idx2[off * 2 + 1]) in low 4
+                * octets.  But idx2[off * 2] is Zero!!!
+                */
+               ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc);
+               opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]);
+       }
+
+       if (1 < opts->anomaly_nr)
+               qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32);
+}
+
+static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
+{
+       struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+
+       if (!p)
+               die("Cannot open existing pack file '%s'", pack_name);
+       if (open_pack_index(p))
+               die("Cannot open existing pack idx file for '%s'", pack_name);
+
+       /* Read the attributes from the existing idx file */
+       opts->version = p->index_version;
+
+       if (opts->version == 2)
+               read_v2_anomalous_offsets(p, opts);
+
+       /*
+        * Get rid of the idx file as we do not need it anymore.
+        * NEEDSWORK: extract this bit from free_pack_by_name() in
+        * sha1_file.c, perhaps?  It shouldn't matter very much as we
+        * know we haven't installed this pack (hence we never have
+        * read anything from it).
+        */
+       close_pack_index(p);
+       free(p);
+}
+
+static void show_pack_info(int stat_only)
+{
+       int i, baseobjects = nr_objects - nr_deltas;
+       unsigned long *chain_histogram = NULL;
+
+       if (deepest_delta)
+               chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long));
+
+       for (i = 0; i < nr_objects; i++) {
+               struct object_entry *obj = &objects[i];
+
+               if (is_delta_type(obj->type))
+                       chain_histogram[obj->delta_depth - 1]++;
+               if (stat_only)
+                       continue;
+               printf("%s %-6s %lu %lu %"PRIuMAX,
+                      sha1_to_hex(obj->idx.sha1),
+                      typename(obj->real_type), obj->size,
+                      (unsigned long)(obj[1].idx.offset - obj->idx.offset),
+                      (uintmax_t)obj->idx.offset);
+               if (is_delta_type(obj->type)) {
+                       struct object_entry *bobj = &objects[obj->base_object_no];
+                       printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1));
+               }
+               putchar('\n');
+       }
+
+       if (baseobjects)
+               printf("non delta: %d object%s\n",
+                      baseobjects, baseobjects > 1 ? "s" : "");
+       for (i = 0; i < deepest_delta; i++) {
+               if (!chain_histogram[i])
+                       continue;
+               printf("chain length = %d: %lu object%s\n",
+                      i + 1,
+                      chain_histogram[i],
+                      chain_histogram[i] > 1 ? "s" : "");
+       }
+}
+
 int cmd_index_pack(int argc, const char **argv, const char *prefix)
 {
-       int i, fix_thin_pack = 0;
+       int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0;
        const char *curr_pack, *curr_index;
        const char *index_name = NULL, *pack_name = NULL;
        const char *keep_name = NULL, *keep_msg = NULL;
        char *index_name_buf = NULL, *keep_name_buf = NULL;
        struct pack_idx_entry **idx_objects;
+       struct pack_idx_option opts;
        unsigned char pack_sha1[20];
 
        if (argc == 2 && !strcmp(argv[1], "-h"))
@@ -884,7 +1028,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 
        read_replace_refs = 0;
 
-       git_config(git_index_pack_config, NULL);
+       reset_pack_idx_option(&opts);
+       git_config(git_index_pack_config, &opts);
        if (prefix && chdir(prefix))
                die("Cannot come back to cwd");
 
@@ -898,6 +1043,15 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                                fix_thin_pack = 1;
                        } else if (!strcmp(arg, "--strict")) {
                                strict = 1;
+                       } else if (!strcmp(arg, "--verify")) {
+                               verify = 1;
+                       } else if (!strcmp(arg, "--verify-stat")) {
+                               verify = 1;
+                               stat = 1;
+                       } else if (!strcmp(arg, "--verify-stat-only")) {
+                               verify = 1;
+                               stat = 1;
+                               stat_only = 1;
                        } else if (!strcmp(arg, "--keep")) {
                                keep_msg = "";
                        } else if (!prefixcmp(arg, "--keep=")) {
@@ -923,12 +1077,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                                index_name = argv[++i];
                        } else if (!prefixcmp(arg, "--index-version=")) {
                                char *c;
-                               pack_idx_default_version = strtoul(arg + 16, &c, 10);
-                               if (pack_idx_default_version > 2)
+                               opts.version = strtoul(arg + 16, &c, 10);
+                               if (opts.version > 2)
                                        die("bad %s", arg);
                                if (*c == ',')
-                                       pack_idx_off32_limit = strtoul(c+1, &c, 0);
-                               if (*c || pack_idx_off32_limit & 0x80000000)
+                                       opts.off32_limit = strtoul(c+1, &c, 0);
+                               if (*c || opts.off32_limit & 0x80000000)
                                        die("bad %s", arg);
                        } else
                                usage(index_pack_usage);
@@ -964,11 +1118,17 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                strcpy(keep_name_buf + len - 5, ".keep");
                keep_name = keep_name_buf;
        }
+       if (verify) {
+               if (!index_name)
+                       die("--verify with no packfile name given");
+               read_idx_option(&opts, index_name);
+               opts.flags |= WRITE_IDX_VERIFY;
+       }
 
        curr_pack = open_pack_file(pack_name);
        parse_pack_header();
-       objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
-       deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
+       objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
+       deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
        parse_pack_objects(pack_sha1);
        if (nr_deltas == nr_resolved_deltas) {
                stop_progress(&progress);
@@ -1008,16 +1168,22 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
        if (strict)
                check_objects();
 
+       if (stat)
+               show_pack_info(stat_only);
+
        idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *));
        for (i = 0; i < nr_objects; i++)
                idx_objects[i] = &objects[i].idx;
-       curr_index = write_idx_file(index_name, idx_objects, nr_objects, pack_sha1);
+       curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1);
        free(idx_objects);
 
-       final(pack_name, curr_pack,
-               index_name, curr_index,
-               keep_name, keep_msg,
-               pack_sha1);
+       if (!verify)
+               final(pack_name, curr_pack,
+                     index_name, curr_index,
+                     keep_name, keep_msg,
+                     pack_sha1);
+       else
+               close(input_fd);
        free(objects);
        free(index_name_buf);
        free(keep_name_buf);
index c6e2d8766b0ec15fcfe9dc0a60ee81db6750b527..84e6dafb12657a6f0431f2a287d0cca976269c35 100644 (file)
@@ -70,6 +70,7 @@ static int local;
 static int incremental;
 static int ignore_packed_keep;
 static int allow_ofs_delta;
+static struct pack_idx_option pack_idx_opts;
 static const char *base_name;
 static int progress = 1;
 static int window = 10;
@@ -493,8 +494,8 @@ static void write_pack_file(void)
                        const char *idx_tmp_name;
                        char tmpname[PATH_MAX];
 
-                       idx_tmp_name = write_idx_file(NULL, written_list,
-                                                     nr_written, sha1);
+                       idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
+                                                     &pack_idx_opts, sha1);
 
                        snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
                                 base_name, sha1_to_hex(sha1));
@@ -1884,10 +1885,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                return 0;
        }
        if (!strcmp(k, "pack.indexversion")) {
-               pack_idx_default_version = git_config_int(k, v);
-               if (pack_idx_default_version > 2)
+               pack_idx_opts.version = git_config_int(k, v);
+               if (pack_idx_opts.version > 2)
                        die("bad pack.indexversion=%"PRIu32,
-                               pack_idx_default_version);
+                           pack_idx_opts.version);
                return 0;
        }
        if (!strcmp(k, "pack.packsizelimit")) {
@@ -2134,6 +2135,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
        rp_ac = 2;
 
+       reset_pack_idx_option(&pack_idx_opts);
        git_config(git_pack_config, NULL);
        if (!pack_compression_seen && core_compression_seen)
                pack_compression_level = core_compression_level;
@@ -2278,12 +2280,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                }
                if (!prefixcmp(arg, "--index-version=")) {
                        char *c;
-                       pack_idx_default_version = strtoul(arg + 16, &c, 10);
-                       if (pack_idx_default_version > 2)
+                       pack_idx_opts.version = strtoul(arg + 16, &c, 10);
+                       if (pack_idx_opts.version > 2)
                                die("bad %s", arg);
                        if (*c == ',')
-                               pack_idx_off32_limit = strtoul(c+1, &c, 0);
-                       if (*c || pack_idx_off32_limit & 0x80000000)
+                               pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
+                       if (*c || pack_idx_opts.off32_limit & 0x80000000)
                                die("bad %s", arg);
                        continue;
                }
index 3a919b170726a95b19c16f984ade250d4fc24c07..e841b4a38d2b47c39d95683a5747cbc19da1f48b 100644 (file)
 #include "builtin.h"
 #include "cache.h"
-#include "pack.h"
-#include "pack-revindex.h"
+#include "run-command.h"
 #include "parse-options.h"
 
-#define MAX_CHAIN 50
-
 #define VERIFY_PACK_VERBOSE 01
 #define VERIFY_PACK_STAT_ONLY 02
 
-static void show_pack_info(struct packed_git *p, unsigned int flags)
-{
-       uint32_t nr_objects, i;
-       int cnt;
-       int stat_only = flags & VERIFY_PACK_STAT_ONLY;
-       unsigned long chain_histogram[MAX_CHAIN+1], baseobjects;
-
-       nr_objects = p->num_objects;
-       memset(chain_histogram, 0, sizeof(chain_histogram));
-       baseobjects = 0;
-
-       for (i = 0; i < nr_objects; i++) {
-               const unsigned char *sha1;
-               unsigned char base_sha1[20];
-               const char *type;
-               unsigned long size;
-               unsigned long store_size;
-               off_t offset;
-               unsigned int delta_chain_length;
-
-               sha1 = nth_packed_object_sha1(p, i);
-               if (!sha1)
-                       die("internal error pack-check nth-packed-object");
-               offset = nth_packed_object_offset(p, i);
-               type = typename(packed_object_info_detail(p, offset, &size, &store_size,
-                                                &delta_chain_length,
-                                                base_sha1));
-               if (!stat_only)
-                       printf("%s ", sha1_to_hex(sha1));
-               if (!delta_chain_length) {
-                       if (!stat_only)
-                               printf("%-6s %lu %lu %"PRIuMAX"\n",
-                                      type, size, store_size, (uintmax_t)offset);
-                       baseobjects++;
-               }
-               else {
-                       if (!stat_only)
-                               printf("%-6s %lu %lu %"PRIuMAX" %u %s\n",
-                                      type, size, store_size, (uintmax_t)offset,
-                                      delta_chain_length, sha1_to_hex(base_sha1));
-                       if (delta_chain_length <= MAX_CHAIN)
-                               chain_histogram[delta_chain_length]++;
-                       else
-                               chain_histogram[0]++;
-               }
-       }
-
-       if (baseobjects)
-               printf("non delta: %lu object%s\n",
-                      baseobjects, baseobjects > 1 ? "s" : "");
-
-       for (cnt = 1; cnt <= MAX_CHAIN; cnt++) {
-               if (!chain_histogram[cnt])
-                       continue;
-               printf("chain length = %d: %lu object%s\n", cnt,
-                      chain_histogram[cnt],
-                      chain_histogram[cnt] > 1 ? "s" : "");
-       }
-       if (chain_histogram[0])
-               printf("chain length > %d: %lu object%s\n", MAX_CHAIN,
-                      chain_histogram[0],
-                      chain_histogram[0] > 1 ? "s" : "");
-}
-
 static int verify_one_pack(const char *path, unsigned int flags)
 {
-       char arg[PATH_MAX];
-       int len;
+       struct child_process index_pack;
+       const char *argv[] = {"index-pack", NULL, NULL, NULL };
+       struct strbuf arg = STRBUF_INIT;
        int verbose = flags & VERIFY_PACK_VERBOSE;
        int stat_only = flags & VERIFY_PACK_STAT_ONLY;
-       struct packed_git *pack;
        int err;
 
-       len = strlcpy(arg, path, PATH_MAX);
-       if (len >= PATH_MAX)
-               return error("name too long: %s", path);
-
-       /*
-        * In addition to "foo.idx" we accept "foo.pack" and "foo";
-        * normalize these forms to "foo.idx" for add_packed_git().
-        */
-       if (has_extension(arg, ".pack")) {
-               strcpy(arg + len - 5, ".idx");
-               len--;
-       } else if (!has_extension(arg, ".idx")) {
-               if (len + 4 >= PATH_MAX)
-                       return error("name too long: %s.idx", arg);
-               strcpy(arg + len, ".idx");
-               len += 4;
-       }
+       if (stat_only)
+               argv[1] = "--verify-stat-only";
+       else if (verbose)
+               argv[1] = "--verify-stat";
+       else
+               argv[1] = "--verify";
 
        /*
-        * add_packed_git() uses our buffer (containing "foo.idx") to
-        * build the pack filename ("foo.pack").  Make sure it fits.
+        * In addition to "foo.pack" we accept "foo.idx" and "foo";
+        * normalize these forms to "foo.pack" for "index-pack --verify".
         */
-       if (len + 1 >= PATH_MAX) {
-               arg[len - 4] = '\0';
-               return error("name too long: %s.pack", arg);
-       }
-
-       pack = add_packed_git(arg, len, 1);
-       if (!pack)
-               return error("packfile %s not found.", arg);
+       strbuf_addstr(&arg, path);
+       if (has_extension(arg.buf, ".idx"))
+               strbuf_splice(&arg, arg.len - 3, 3, "pack", 4);
+       else if (!has_extension(arg.buf, ".pack"))
+               strbuf_add(&arg, ".pack", 5);
+       argv[2] = arg.buf;
 
-       install_packed_git(pack);
+       memset(&index_pack, 0, sizeof(index_pack));
+       index_pack.argv = argv;
+       index_pack.git_cmd = 1;
 
-       if (!stat_only)
-               err = verify_pack(pack);
-       else
-               err = open_pack_index(pack);
+       err = run_command(&index_pack);
 
        if (verbose || stat_only) {
                if (err)
-                       printf("%s: bad\n", pack->pack_name);
+                       printf("%s: bad\n", arg.buf);
                else {
-                       show_pack_info(pack, flags);
                        if (!stat_only)
-                               printf("%s: ok\n", pack->pack_name);
+                               printf("%s: ok\n", arg.buf);
                }
        }
+       strbuf_release(&arg);
 
        return err;
 }
@@ -159,7 +78,6 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix)
        for (i = 0; i < argc; i++) {
                if (verify_one_pack(argv[i], flags))
                        err = 1;
-               discard_revindex();
        }
 
        return err;
diff --git a/cache.h b/cache.h
index 5e80113ee93c5c44a7ada5982faaad9a3fcbdc2e..9e12d55470435102215f2dd99eb3432856612dbd 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -1010,7 +1010,6 @@ extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
 extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
-extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
 
 struct object_info {
index be49d5fcf900cb47cb14d8c85a69112b26532b93..fc97d6e04528b5c5b55fc211a462f3cb828f3d49 100644 (file)
 #include "progress.h"
 #include "csum-file.h"
 
-static void flush(struct sha1file *f, void * buf, unsigned int count)
+static void flush(struct sha1file *f, void *buf, unsigned int count)
 {
+       if (0 <= f->check_fd && count)  {
+               unsigned char check_buffer[8192];
+               ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
+
+               if (ret < 0)
+                       die_errno("%s: sha1 file read error", f->name);
+               if (ret < count)
+                       die("%s: sha1 file truncated", f->name);
+               if (memcmp(buf, check_buffer, count))
+                       die("sha1 file '%s' validation error", f->name);
+       }
+
        for (;;) {
                int ret = xwrite(f->fd, buf, count);
                if (ret > 0) {
@@ -59,6 +71,17 @@ int sha1close(struct sha1file *f, unsigned char *result, unsigned int flags)
                fd = 0;
        } else
                fd = f->fd;
+       if (0 <= f->check_fd) {
+               char discard;
+               int cnt = read_in_full(f->check_fd, &discard, 1);
+               if (cnt < 0)
+                       die_errno("%s: error when reading the tail of sha1 file",
+                                 f->name);
+               if (cnt)
+                       die("%s: sha1 file has trailing garbage", f->name);
+               if (close(f->check_fd))
+                       die_errno("%s: sha1 file error on close", f->name);
+       }
        free(f);
        return fd;
 }
@@ -101,10 +124,31 @@ struct sha1file *sha1fd(int fd, const char *name)
        return sha1fd_throughput(fd, name, NULL);
 }
 
+struct sha1file *sha1fd_check(const char *name)
+{
+       int sink, check;
+       struct sha1file *f;
+
+       sink = open("/dev/null", O_WRONLY);
+       if (sink < 0)
+               return NULL;
+       check = open(name, O_RDONLY);
+       if (check < 0) {
+               int saved_errno = errno;
+               close(sink);
+               errno = saved_errno;
+               return NULL;
+       }
+       f = sha1fd(sink, name);
+       f->check_fd = check;
+       return f;
+}
+
 struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp)
 {
        struct sha1file *f = xmalloc(sizeof(*f));
        f->fd = fd;
+       f->check_fd = -1;
        f->offset = 0;
        f->total = 0;
        f->tp = tp;
index 294add2a91496355b42ce02ecfe9c453d21b291a..6a7967c6bf604076c7d68ce139f65f34df3bc30e 100644 (file)
@@ -6,6 +6,7 @@ struct progress;
 /* A SHA1-protected file */
 struct sha1file {
        int fd;
+       int check_fd;
        unsigned int offset;
        git_SHA_CTX ctx;
        off_t total;
@@ -21,6 +22,7 @@ struct sha1file {
 #define CSUM_FSYNC     2
 
 extern struct sha1file *sha1fd(int fd, const char *name);
+extern struct sha1file *sha1fd_check(const char *name);
 extern struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp);
 extern int sha1close(struct sha1file *, unsigned char *, unsigned int);
 extern int sha1write(struct sha1file *, void *, unsigned int);
index 1d5e3336a51a4bf19c0ab700565826b438c43266..9e8d1868aa3b178f4b61ef57430883854898b044 100644 (file)
@@ -304,6 +304,7 @@ static unsigned int atom_cnt;
 static struct atom_str **atom_table;
 
 /* The .pack file being generated */
+static struct pack_idx_option pack_idx_opts;
 static unsigned int pack_id;
 static struct sha1file *pack_file;
 static struct packed_git *pack_data;
@@ -896,7 +897,7 @@ static const char *create_index(void)
        if (c != last)
                die("internal consistency error creating the index");
 
-       tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
+       tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1);
        free(idx);
        return tmpfile;
 }
@@ -3195,10 +3196,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                return 0;
        }
        if (!strcmp(k, "pack.indexversion")) {
-               pack_idx_default_version = git_config_int(k, v);
-               if (pack_idx_default_version > 2)
+               pack_idx_opts.version = git_config_int(k, v);
+               if (pack_idx_opts.version > 2)
                        die("bad pack.indexversion=%"PRIu32,
-                           pack_idx_default_version);
+                           pack_idx_opts.version);
                return 0;
        }
        if (!strcmp(k, "pack.packsizelimit")) {
@@ -3252,6 +3253,7 @@ int main(int argc, const char **argv)
                usage(fast_import_usage);
 
        setup_git_directory();
+       reset_pack_idx_option(&pack_idx_opts);
        git_config(git_pack_config, NULL);
        if (!pack_compression_seen && core_compression_seen)
                pack_compression_level = core_compression_level;
index a905ca4486754f099a30f90a2fcd22d0c771a070..9cd3bfbb4b3859cbbdc1b9375ea95f511fffc94e 100644 (file)
@@ -2,8 +2,12 @@
 #include "pack.h"
 #include "csum-file.h"
 
-uint32_t pack_idx_default_version = 2;
-uint32_t pack_idx_off32_limit = 0x7fffffff;
+void reset_pack_idx_option(struct pack_idx_option *opts)
+{
+       memset(opts, 0, sizeof(*opts));
+       opts->version = 2;
+       opts->off32_limit = 0x7fffffff;
+}
 
 static int sha1_compare(const void *_a, const void *_b)
 {
@@ -12,13 +16,35 @@ static int sha1_compare(const void *_a, const void *_b)
        return hashcmp(a->sha1, b->sha1);
 }
 
+static int cmp_uint32(const void *a_, const void *b_)
+{
+       uint32_t a = *((uint32_t *)a_);
+       uint32_t b = *((uint32_t *)b_);
+
+       return (a < b) ? -1 : (a != b);
+}
+
+static int need_large_offset(off_t offset, const struct pack_idx_option *opts)
+{
+       uint32_t ofsval;
+
+       if ((offset >> 31) || (opts->off32_limit < offset))
+               return 1;
+       if (!opts->anomaly_nr)
+               return 0;
+       ofsval = offset;
+       return !!bsearch(&ofsval, opts->anomaly, opts->anomaly_nr,
+                        sizeof(ofsval), cmp_uint32);
+}
+
 /*
  * On entry *sha1 contains the pack content SHA1 hash, on exit it is
  * the SHA1 hash of sorted object names. The objects array passed in
  * will be sorted by SHA1 on exit.
  */
 const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects,
-                          int nr_objects, unsigned char *sha1)
+                          int nr_objects, const struct pack_idx_option *opts,
+                          unsigned char *sha1)
 {
        struct sha1file *f;
        struct pack_idx_entry **sorted_by_sha, **list, **last;
@@ -42,20 +68,25 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
        else
                sorted_by_sha = list = last = NULL;
 
-       if (!index_name) {
-               static char tmpfile[PATH_MAX];
-               fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX");
-               index_name = xstrdup(tmpfile);
+       if (opts->flags & WRITE_IDX_VERIFY) {
+               assert(index_name);
+               f = sha1fd_check(index_name);
        } else {
-               unlink(index_name);
-               fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+               if (!index_name) {
+                       static char tmpfile[PATH_MAX];
+                       fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX");
+                       index_name = xstrdup(tmpfile);
+               } else {
+                       unlink(index_name);
+                       fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+               }
+               if (fd < 0)
+                       die_errno("unable to create '%s'", index_name);
+               f = sha1fd(fd, index_name);
        }
-       if (fd < 0)
-               die_errno("unable to create '%s'", index_name);
-       f = sha1fd(fd, index_name);
 
        /* if last object's offset is >= 2^31 we should use index V2 */
-       index_version = (last_obj_offset >> 31) ? 2 : pack_idx_default_version;
+       index_version = need_large_offset(last_obj_offset, opts) ? 2 : opts->version;
 
        /* index versions 2 and above need a header */
        if (index_version >= 2) {
@@ -115,8 +146,11 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
                list = sorted_by_sha;
                for (i = 0; i < nr_objects; i++) {
                        struct pack_idx_entry *obj = *list++;
-                       uint32_t offset = (obj->offset <= pack_idx_off32_limit) ?
-                               obj->offset : (0x80000000 | nr_large_offset++);
+                       uint32_t offset;
+
+                       offset = (need_large_offset(obj->offset, opts)
+                                 ? (0x80000000 | nr_large_offset++)
+                                 : obj->offset);
                        offset = htonl(offset);
                        sha1write(f, &offset, 4);
                }
@@ -126,18 +160,20 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
                while (nr_large_offset) {
                        struct pack_idx_entry *obj = *list++;
                        uint64_t offset = obj->offset;
-                       if (offset > pack_idx_off32_limit) {
-                               uint32_t split[2];
-                               split[0] = htonl(offset >> 32);
-                               split[1] = htonl(offset & 0xffffffff);
-                               sha1write(f, split, 8);
-                               nr_large_offset--;
-                       }
+                       uint32_t split[2];
+
+                       if (!need_large_offset(offset, opts))
+                               continue;
+                       split[0] = htonl(offset >> 32);
+                       split[1] = htonl(offset & 0xffffffff);
+                       sha1write(f, split, 8);
+                       nr_large_offset--;
                }
        }
 
        sha1write(f, sha1, 20);
-       sha1close(f, NULL, CSUM_FSYNC);
+       sha1close(f, NULL, ((opts->flags & WRITE_IDX_VERIFY)
+                           ? CSUM_CLOSE : CSUM_FSYNC));
        git_SHA1_Final(sha1, &ctx);
        return index_name;
 }
diff --git a/pack.h b/pack.h
index bb275762b7eb6f473f333ae40780821e383db20b..722a54e00a2cb7d9514c12f799fb1ec15930cf5d 100644 (file)
--- a/pack.h
+++ b/pack.h
@@ -34,9 +34,24 @@ struct pack_header {
  */
 #define PACK_IDX_SIGNATURE 0xff744f63  /* "\377tOc" */
 
-/* These may be overridden by command-line parameters */
-extern uint32_t pack_idx_default_version;
-extern uint32_t pack_idx_off32_limit;
+struct pack_idx_option {
+       unsigned flags;
+       /* flag bits */
+#define WRITE_IDX_VERIFY 01
+
+       uint32_t version;
+       uint32_t off32_limit;
+
+       /*
+        * List of offsets that would fit within off32_limit but
+        * need to be written out as 64-bit entity for byte-for-byte
+        * verification.
+        */
+       int anomaly_alloc, anomaly_nr;
+       uint32_t *anomaly;
+};
+
+extern void reset_pack_idx_option(struct pack_idx_option *);
 
 /*
  * Packed object index header
@@ -55,7 +70,7 @@ struct pack_idx_entry {
        off_t offset;
 };
 
-extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, unsigned char *sha1);
+extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1);
 extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
 extern int verify_pack(struct packed_git *);
index a6aac70923cad6ed3f2afec631dcc67d5f28a019..89d7e5eb57ea80a7bca3f361530903032fbe2b03 100644 (file)
@@ -1553,61 +1553,6 @@ int unpack_object_header(struct packed_git *p,
        return type;
 }
 
-int packed_object_info_detail(struct packed_git *p,
-                                     off_t obj_offset,
-                                     unsigned long *size,
-                                     unsigned long *store_size,
-                                     unsigned int *delta_chain_length,
-                                     unsigned char *base_sha1)
-{
-       struct pack_window *w_curs = NULL;
-       off_t curpos;
-       unsigned long dummy;
-       unsigned char *next_sha1;
-       enum object_type type;
-       struct revindex_entry *revidx;
-
-       *delta_chain_length = 0;
-       curpos = obj_offset;
-       type = unpack_object_header(p, &w_curs, &curpos, size);
-
-       revidx = find_pack_revindex(p, obj_offset);
-       *store_size = revidx[1].offset - obj_offset;
-
-       for (;;) {
-               switch (type) {
-               default:
-                       die("pack %s contains unknown object type %d",
-                           p->pack_name, type);
-               case OBJ_COMMIT:
-               case OBJ_TREE:
-               case OBJ_BLOB:
-               case OBJ_TAG:
-                       unuse_pack(&w_curs);
-                       return type;
-               case OBJ_OFS_DELTA:
-                       obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
-                       if (!obj_offset)
-                               die("pack %s contains bad delta base reference of type %s",
-                                   p->pack_name, typename(type));
-                       if (*delta_chain_length == 0) {
-                               revidx = find_pack_revindex(p, obj_offset);
-                               hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
-                       }
-                       break;
-               case OBJ_REF_DELTA:
-                       next_sha1 = use_pack(p, &w_curs, curpos, NULL);
-                       if (*delta_chain_length == 0)
-                               hashcpy(base_sha1, next_sha1);
-                       obj_offset = find_pack_entry_one(next_sha1, p);
-                       break;
-               }
-               (*delta_chain_length)++;
-               curpos = obj_offset;
-               type = unpack_object_header(p, &w_curs, &curpos, &dummy);
-       }
-}
-
 static int packed_object_info(struct packed_git *p, off_t obj_offset,
                              unsigned long *sizep, int *rtype)
 {
index b34ea93a8056a7ae0edf111ffe14c99b7c5b33c5..f8fa92446cfc46309468b4ecf142b74b1a812985 100755 (executable)
@@ -65,6 +65,14 @@ test_expect_success \
     'cmp "test-1-${pack1}.idx" "1.idx" &&
      cmp "test-2-${pack2}.idx" "2.idx"'
 
+test_expect_success 'index-pack --verify on index version 1' '
+       git index-pack --verify "test-1-${pack1}.pack"
+'
+
+test_expect_success 'index-pack --verify on index version 2' '
+       git index-pack --verify "test-2-${pack2}.pack"
+'
+
 test_expect_success \
     'index v2: force some 64-bit offsets with pack-objects' \
     'pack3=$(git pack-objects --index-version=2,0x40000 test-3 <obj-list)'
@@ -93,6 +101,16 @@ test_expect_success OFF64_T \
     '64-bit offsets: index-pack result should match pack-objects one' \
     'cmp "test-3-${pack3}.idx" "3.idx"'
 
+test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2 (cheat)' '
+       # This cheats by knowing which lower offset should still be encoded
+       # in 64-bit representation.
+       git index-pack --verify --index-version=2,0x40000 "test-3-${pack3}.pack"
+'
+
+test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2' '
+       git index-pack --verify "test-3-${pack3}.pack"
+'
+
 # returns the object number for given object in given pack index
 index_obj_nr()
 {
@@ -208,9 +226,8 @@ test_expect_success \
      ( while read obj
        do git cat-file -p $obj >/dev/null || exit 1
        done <obj-list ) &&
-     err=$(test_must_fail git verify-pack \
-       ".git/objects/pack/pack-${pack1}.pack" 2>&1) &&
-     echo "$err" | grep "CRC mismatch"'
+     test_must_fail git verify-pack ".git/objects/pack/pack-${pack1}.pack"
+'
 
 test_expect_success 'running index-pack in the object store' '
     rm -f .git/objects/pack/* &&