pack-check.con commit simple euristic for further free packing improvements (4e8da19)
   1#include "cache.h"
   2#include "pack.h"
   3
   4static int verify_packfile(struct packed_git *p)
   5{
   6        unsigned long index_size = p->index_size;
   7        void *index_base = p->index_base;
   8        SHA_CTX ctx;
   9        unsigned char sha1[20];
  10        unsigned long pack_size = p->pack_size;
  11        void *pack_base;
  12        struct pack_header *hdr;
  13        int nr_objects, err, i;
  14
  15        /* Header consistency check */
  16        hdr = p->pack_base;
  17        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
  18                return error("Packfile %s signature mismatch", p->pack_name);
  19        if (!pack_version_ok(hdr->hdr_version))
  20                return error("Packfile version %d unsupported",
  21                             ntohl(hdr->hdr_version));
  22        nr_objects = ntohl(hdr->hdr_entries);
  23        if (num_packed_objects(p) != nr_objects)
  24                return error("Packfile claims to have %d objects, "
  25                             "while idx size expects %d", nr_objects,
  26                             num_packed_objects(p));
  27
  28        SHA1_Init(&ctx);
  29        pack_base = p->pack_base;
  30        SHA1_Update(&ctx, pack_base, pack_size - 20);
  31        SHA1_Final(sha1, &ctx);
  32        if (memcmp(sha1, pack_base + pack_size - 20, 20))
  33                return error("Packfile %s SHA1 mismatch with itself",
  34                             p->pack_name);
  35        if (memcmp(sha1, index_base + index_size - 40, 20))
  36                return error("Packfile %s SHA1 mismatch with idx",
  37                             p->pack_name);
  38
  39        /* Make sure everything reachable from idx is valid.  Since we
  40         * have verified that nr_objects matches between idx and pack,
  41         * we do not do scan-streaming check on the pack file.
  42         */
  43        for (i = err = 0; i < nr_objects; i++) {
  44                unsigned char sha1[20];
  45                struct pack_entry e;
  46                void *data;
  47                char type[20];
  48                unsigned long size;
  49
  50                if (nth_packed_object_sha1(p, i, sha1))
  51                        die("internal error pack-check nth-packed-object");
  52                if (!find_pack_entry_one(sha1, &e, p))
  53                        die("internal error pack-check find-pack-entry-one");
  54                data = unpack_entry_gently(&e, type, &size);
  55                if (!data) {
  56                        err = error("cannot unpack %s from %s",
  57                                    sha1_to_hex(sha1), p->pack_name);
  58                        continue;
  59                }
  60                if (check_sha1_signature(sha1, data, size, type)) {
  61                        err = error("packed %s from %s is corrupt",
  62                                    sha1_to_hex(sha1), p->pack_name);
  63                        free(data);
  64                        continue;
  65                }
  66                free(data);
  67        }
  68
  69        return err;
  70}
  71
  72
  73#define MAX_CHAIN 40
  74
  75static void show_pack_info(struct packed_git *p)
  76{
  77        struct pack_header *hdr;
  78        int nr_objects, i;
  79        unsigned int chain_histogram[MAX_CHAIN];
  80
  81        hdr = p->pack_base;
  82        nr_objects = ntohl(hdr->hdr_entries);
  83        memset(chain_histogram, 0, sizeof(chain_histogram));
  84
  85        for (i = 0; i < nr_objects; i++) {
  86                unsigned char sha1[20], base_sha1[20];
  87                struct pack_entry e;
  88                char type[20];
  89                unsigned long size;
  90                unsigned long store_size;
  91                unsigned int delta_chain_length;
  92
  93                if (nth_packed_object_sha1(p, i, sha1))
  94                        die("internal error pack-check nth-packed-object");
  95                if (!find_pack_entry_one(sha1, &e, p))
  96                        die("internal error pack-check find-pack-entry-one");
  97
  98                packed_object_info_detail(&e, type, &size, &store_size,
  99                                          &delta_chain_length,
 100                                          base_sha1);
 101                printf("%s ", sha1_to_hex(sha1));
 102                if (!delta_chain_length)
 103                        printf("%-6s %lu %u\n", type, size, e.offset);
 104                else {
 105                        printf("%-6s %lu %u %u %s\n", type, size, e.offset,
 106                               delta_chain_length, sha1_to_hex(base_sha1));
 107                        if (delta_chain_length < MAX_CHAIN)
 108                                chain_histogram[delta_chain_length]++;
 109                        else
 110                                chain_histogram[0]++;
 111                }
 112        }
 113
 114        for (i = 0; i < MAX_CHAIN; i++) {
 115                if (!chain_histogram[i])
 116                        continue;
 117                printf("chain length %s %d: %d object%s\n",
 118                       i ? "=" : ">=",
 119                       i ? i : MAX_CHAIN,
 120                       chain_histogram[i],
 121                       1 < chain_histogram[i] ? "s" : "");
 122        }
 123}
 124
 125int verify_pack(struct packed_git *p, int verbose)
 126{
 127        unsigned long index_size = p->index_size;
 128        void *index_base = p->index_base;
 129        SHA_CTX ctx;
 130        unsigned char sha1[20];
 131        int ret;
 132
 133        ret = 0;
 134        /* Verify SHA1 sum of the index file */
 135        SHA1_Init(&ctx);
 136        SHA1_Update(&ctx, index_base, index_size - 20);
 137        SHA1_Final(sha1, &ctx);
 138        if (memcmp(sha1, index_base + index_size - 20, 20))
 139                ret = error("Packfile index for %s SHA1 mismatch",
 140                            p->pack_name);
 141
 142        if (!ret) {
 143                /* Verify pack file */
 144                use_packed_git(p);
 145                ret = verify_packfile(p);
 146                unuse_packed_git(p);
 147        }
 148
 149        if (verbose) {
 150                if (ret)
 151                        printf("%s: bad\n", p->pack_name);
 152                else {
 153                        use_packed_git(p);
 154                        show_pack_info(p);
 155                        unuse_packed_git(p);
 156                        printf("%s: ok\n", p->pack_name);
 157                }
 158        }
 159
 160        return ret;
 161}