index-pack.con commit index-pack: compare only the first 20-bytes of the key. (3c55287)
   1#include "cache.h"
   2#include "delta.h"
   3#include "pack.h"
   4#include "csum-file.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9
  10static const char index_pack_usage[] =
  11"git-index-pack [-o index-file] pack-file";
  12
  13struct object_entry
  14{
  15        unsigned long offset;
  16        enum object_type type;
  17        enum object_type real_type;
  18        unsigned char sha1[20];
  19};
  20
  21union delta_base {
  22        unsigned char sha1[20];
  23        unsigned long offset;
  24};
  25
  26/*
  27 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
  28 * to memcmp() only the first 20 bytes.
  29 */
  30#define UNION_BASE_SZ   20
  31
  32struct delta_entry
  33{
  34        struct object_entry *obj;
  35        union delta_base base;
  36};
  37
  38static const char *pack_name;
  39static unsigned char *pack_base;
  40static unsigned long pack_size;
  41static struct object_entry *objects;
  42static struct delta_entry *deltas;
  43static int nr_objects;
  44static int nr_deltas;
  45
  46static void open_pack_file(void)
  47{
  48        int fd;
  49        struct stat st;
  50
  51        fd = open(pack_name, O_RDONLY);
  52        if (fd < 0)
  53                die("cannot open packfile '%s': %s", pack_name,
  54                    strerror(errno));
  55        if (fstat(fd, &st)) {
  56                int err = errno;
  57                close(fd);
  58                die("cannot fstat packfile '%s': %s", pack_name,
  59                    strerror(err));
  60        }
  61        pack_size = st.st_size;
  62        pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
  63        if (pack_base == MAP_FAILED) {
  64                int err = errno;
  65                close(fd);
  66                die("cannot mmap packfile '%s': %s", pack_name,
  67                    strerror(err));
  68        }
  69        close(fd);
  70}
  71
  72static void parse_pack_header(void)
  73{
  74        const struct pack_header *hdr;
  75        unsigned char sha1[20];
  76        SHA_CTX ctx;
  77
  78        /* Ensure there are enough bytes for the header and final SHA1 */
  79        if (pack_size < sizeof(struct pack_header) + 20)
  80                die("packfile '%s' is too small", pack_name);
  81
  82        /* Header consistency check */
  83        hdr = (void *)pack_base;
  84        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
  85                die("packfile '%s' signature mismatch", pack_name);
  86        if (!pack_version_ok(hdr->hdr_version))
  87                die("packfile '%s' version %d unsupported",
  88                    pack_name, ntohl(hdr->hdr_version));
  89
  90        nr_objects = ntohl(hdr->hdr_entries);
  91
  92        /* Check packfile integrity */
  93        SHA1_Init(&ctx);
  94        SHA1_Update(&ctx, pack_base, pack_size - 20);
  95        SHA1_Final(sha1, &ctx);
  96        if (hashcmp(sha1, pack_base + pack_size - 20))
  97                die("packfile '%s' SHA1 mismatch", pack_name);
  98}
  99
 100static void bad_object(unsigned long offset, const char *format,
 101                       ...) NORETURN __attribute__((format (printf, 2, 3)));
 102
 103static void bad_object(unsigned long offset, const char *format, ...)
 104{
 105        va_list params;
 106        char buf[1024];
 107
 108        va_start(params, format);
 109        vsnprintf(buf, sizeof(buf), format, params);
 110        va_end(params);
 111        die("packfile '%s': bad object at offset %lu: %s",
 112            pack_name, offset, buf);
 113}
 114
 115static void *unpack_entry_data(unsigned long offset,
 116                               unsigned long *current_pos, unsigned long size)
 117{
 118        unsigned long pack_limit = pack_size - 20;
 119        unsigned long pos = *current_pos;
 120        z_stream stream;
 121        void *buf = xmalloc(size);
 122
 123        memset(&stream, 0, sizeof(stream));
 124        stream.next_out = buf;
 125        stream.avail_out = size;
 126        stream.next_in = pack_base + pos;
 127        stream.avail_in = pack_limit - pos;
 128        inflateInit(&stream);
 129
 130        for (;;) {
 131                int ret = inflate(&stream, 0);
 132                if (ret == Z_STREAM_END)
 133                        break;
 134                if (ret != Z_OK)
 135                        bad_object(offset, "inflate returned %d", ret);
 136        }
 137        inflateEnd(&stream);
 138        if (stream.total_out != size)
 139                bad_object(offset, "size mismatch (expected %lu, got %lu)",
 140                           size, stream.total_out);
 141        *current_pos = pack_limit - stream.avail_in;
 142        return buf;
 143}
 144
 145static void *unpack_raw_entry(unsigned long offset,
 146                              enum object_type *obj_type,
 147                              unsigned long *obj_size,
 148                              union delta_base *delta_base,
 149                              unsigned long *next_obj_offset)
 150{
 151        unsigned long pack_limit = pack_size - 20;
 152        unsigned long pos = offset;
 153        unsigned char c;
 154        unsigned long size, base_offset;
 155        unsigned shift;
 156        enum object_type type;
 157        void *data;
 158
 159        c = pack_base[pos++];
 160        type = (c >> 4) & 7;
 161        size = (c & 15);
 162        shift = 4;
 163        while (c & 0x80) {
 164                if (pos >= pack_limit)
 165                        bad_object(offset, "object extends past end of pack");
 166                c = pack_base[pos++];
 167                size += (c & 0x7fUL) << shift;
 168                shift += 7;
 169        }
 170
 171        switch (type) {
 172        case OBJ_REF_DELTA:
 173                if (pos + 20 >= pack_limit)
 174                        bad_object(offset, "object extends past end of pack");
 175                hashcpy(delta_base->sha1, pack_base + pos);
 176                pos += 20;
 177                break;
 178        case OBJ_OFS_DELTA:
 179                memset(delta_base, 0, sizeof(*delta_base));
 180                c = pack_base[pos++];
 181                base_offset = c & 127;
 182                while (c & 128) {
 183                        base_offset += 1;
 184                        if (!base_offset || base_offset & ~(~0UL >> 7))
 185                                bad_object(offset, "offset value overflow for delta base object");
 186                        if (pos >= pack_limit)
 187                                bad_object(offset, "object extends past end of pack");
 188                        c = pack_base[pos++];
 189                        base_offset = (base_offset << 7) + (c & 127);
 190                }
 191                delta_base->offset = offset - base_offset;
 192                if (delta_base->offset >= offset)
 193                        bad_object(offset, "delta base offset is out of bound");
 194                break;
 195        case OBJ_COMMIT:
 196        case OBJ_TREE:
 197        case OBJ_BLOB:
 198        case OBJ_TAG:
 199                break;
 200        default:
 201                bad_object(offset, "bad object type %d", type);
 202        }
 203
 204        data = unpack_entry_data(offset, &pos, size);
 205        *obj_type = type;
 206        *obj_size = size;
 207        *next_obj_offset = pos;
 208        return data;
 209}
 210
 211static int find_delta(const union delta_base *base)
 212{
 213        int first = 0, last = nr_deltas;
 214
 215        while (first < last) {
 216                int next = (first + last) / 2;
 217                struct delta_entry *delta = &deltas[next];
 218                int cmp;
 219
 220                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
 221                if (!cmp)
 222                        return next;
 223                if (cmp < 0) {
 224                        last = next;
 225                        continue;
 226                }
 227                first = next+1;
 228        }
 229        return -first-1;
 230}
 231
 232static int find_delta_childs(const union delta_base *base,
 233                             int *first_index, int *last_index)
 234{
 235        int first = find_delta(base);
 236        int last = first;
 237        int end = nr_deltas - 1;
 238
 239        if (first < 0)
 240                return -1;
 241        while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 242                --first;
 243        while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 244                ++last;
 245        *first_index = first;
 246        *last_index = last;
 247        return 0;
 248}
 249
 250static void sha1_object(const void *data, unsigned long size,
 251                        enum object_type type, unsigned char *sha1)
 252{
 253        SHA_CTX ctx;
 254        char header[50];
 255        int header_size;
 256        const char *type_str;
 257
 258        switch (type) {
 259        case OBJ_COMMIT: type_str = commit_type; break;
 260        case OBJ_TREE:   type_str = tree_type; break;
 261        case OBJ_BLOB:   type_str = blob_type; break;
 262        case OBJ_TAG:    type_str = tag_type; break;
 263        default:
 264                die("bad type %d", type);
 265        }
 266
 267        header_size = sprintf(header, "%s %lu", type_str, size) + 1;
 268
 269        SHA1_Init(&ctx);
 270        SHA1_Update(&ctx, header, header_size);
 271        SHA1_Update(&ctx, data, size);
 272        SHA1_Final(sha1, &ctx);
 273}
 274
 275static void resolve_delta(struct delta_entry *delta, void *base_data,
 276                          unsigned long base_size, enum object_type type)
 277{
 278        struct object_entry *obj = delta->obj;
 279        void *delta_data;
 280        unsigned long delta_size;
 281        void *result;
 282        unsigned long result_size;
 283        enum object_type delta_type;
 284        union delta_base delta_base;
 285        unsigned long next_obj_offset;
 286        int j, first, last;
 287
 288        obj->real_type = type;
 289        delta_data = unpack_raw_entry(obj->offset, &delta_type,
 290                                      &delta_size, &delta_base,
 291                                      &next_obj_offset);
 292        result = patch_delta(base_data, base_size, delta_data, delta_size,
 293                             &result_size);
 294        free(delta_data);
 295        if (!result)
 296                bad_object(obj->offset, "failed to apply delta");
 297        sha1_object(result, result_size, type, obj->sha1);
 298
 299        hashcpy(delta_base.sha1, obj->sha1);
 300        if (!find_delta_childs(&delta_base, &first, &last)) {
 301                for (j = first; j <= last; j++)
 302                        if (deltas[j].obj->type == OBJ_REF_DELTA)
 303                                resolve_delta(&deltas[j], result, result_size, type);
 304        }
 305
 306        memset(&delta_base, 0, sizeof(delta_base));
 307        delta_base.offset = obj->offset;
 308        if (!find_delta_childs(&delta_base, &first, &last)) {
 309                for (j = first; j <= last; j++)
 310                        if (deltas[j].obj->type == OBJ_OFS_DELTA)
 311                                resolve_delta(&deltas[j], result, result_size, type);
 312        }
 313
 314        free(result);
 315}
 316
 317static int compare_delta_entry(const void *a, const void *b)
 318{
 319        const struct delta_entry *delta_a = a;
 320        const struct delta_entry *delta_b = b;
 321        return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 322}
 323
 324static void parse_pack_objects(void)
 325{
 326        int i;
 327        unsigned long offset = sizeof(struct pack_header);
 328        struct delta_entry *delta = deltas;
 329        void *data;
 330        unsigned long data_size;
 331
 332        /*
 333         * First pass:
 334         * - find locations of all objects;
 335         * - calculate SHA1 of all non-delta objects;
 336         * - remember base SHA1 for all deltas.
 337         */
 338        for (i = 0; i < nr_objects; i++) {
 339                struct object_entry *obj = &objects[i];
 340                obj->offset = offset;
 341                data = unpack_raw_entry(offset, &obj->type, &data_size,
 342                                        &delta->base, &offset);
 343                obj->real_type = obj->type;
 344                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 345                        nr_deltas++;
 346                        delta->obj = obj;
 347                        delta++;
 348                } else
 349                        sha1_object(data, data_size, obj->type, obj->sha1);
 350                free(data);
 351        }
 352        if (offset != pack_size - 20)
 353                die("packfile '%s' has junk at the end", pack_name);
 354
 355        /* Sort deltas by base SHA1/offset for fast searching */
 356        qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 357              compare_delta_entry);
 358
 359        /*
 360         * Second pass:
 361         * - for all non-delta objects, look if it is used as a base for
 362         *   deltas;
 363         * - if used as a base, uncompress the object and apply all deltas,
 364         *   recursively checking if the resulting object is used as a base
 365         *   for some more deltas.
 366         */
 367        for (i = 0; i < nr_objects; i++) {
 368                struct object_entry *obj = &objects[i];
 369                union delta_base base;
 370                int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
 371
 372                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 373                        continue;
 374                hashcpy(base.sha1, obj->sha1);
 375                ref = !find_delta_childs(&base, &ref_first, &ref_last);
 376                memset(&base, 0, sizeof(base));
 377                base.offset = obj->offset;
 378                ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
 379                if (!ref && !ofs)
 380                        continue;
 381                data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
 382                                        &base, &offset);
 383                if (ref)
 384                        for (j = ref_first; j <= ref_last; j++)
 385                                if (deltas[j].obj->type == OBJ_REF_DELTA)
 386                                        resolve_delta(&deltas[j], data,
 387                                                      data_size, obj->type);
 388                if (ofs)
 389                        for (j = ofs_first; j <= ofs_last; j++)
 390                                if (deltas[j].obj->type == OBJ_OFS_DELTA)
 391                                        resolve_delta(&deltas[j], data,
 392                                                      data_size, obj->type);
 393                free(data);
 394        }
 395
 396        /* Check for unresolved deltas */
 397        for (i = 0; i < nr_deltas; i++) {
 398                if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
 399                    deltas[i].obj->real_type == OBJ_OFS_DELTA)
 400                        die("packfile '%s' has unresolved deltas",  pack_name);
 401        }
 402}
 403
 404static int sha1_compare(const void *_a, const void *_b)
 405{
 406        struct object_entry *a = *(struct object_entry **)_a;
 407        struct object_entry *b = *(struct object_entry **)_b;
 408        return hashcmp(a->sha1, b->sha1);
 409}
 410
 411static void write_index_file(const char *index_name, unsigned char *sha1)
 412{
 413        struct sha1file *f;
 414        struct object_entry **sorted_by_sha, **list, **last;
 415        unsigned int array[256];
 416        int i;
 417        SHA_CTX ctx;
 418
 419        if (nr_objects) {
 420                sorted_by_sha =
 421                        xcalloc(nr_objects, sizeof(struct object_entry *));
 422                list = sorted_by_sha;
 423                last = sorted_by_sha + nr_objects;
 424                for (i = 0; i < nr_objects; ++i)
 425                        sorted_by_sha[i] = &objects[i];
 426                qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
 427                      sha1_compare);
 428
 429        }
 430        else
 431                sorted_by_sha = list = last = NULL;
 432
 433        unlink(index_name);
 434        f = sha1create("%s", index_name);
 435
 436        /*
 437         * Write the first-level table (the list is sorted,
 438         * but we use a 256-entry lookup to be able to avoid
 439         * having to do eight extra binary search iterations).
 440         */
 441        for (i = 0; i < 256; i++) {
 442                struct object_entry **next = list;
 443                while (next < last) {
 444                        struct object_entry *obj = *next;
 445                        if (obj->sha1[0] != i)
 446                                break;
 447                        next++;
 448                }
 449                array[i] = htonl(next - sorted_by_sha);
 450                list = next;
 451        }
 452        sha1write(f, array, 256 * sizeof(int));
 453
 454        /* recompute the SHA1 hash of sorted object names.
 455         * currently pack-objects does not do this, but that
 456         * can be fixed.
 457         */
 458        SHA1_Init(&ctx);
 459        /*
 460         * Write the actual SHA1 entries..
 461         */
 462        list = sorted_by_sha;
 463        for (i = 0; i < nr_objects; i++) {
 464                struct object_entry *obj = *list++;
 465                unsigned int offset = htonl(obj->offset);
 466                sha1write(f, &offset, 4);
 467                sha1write(f, obj->sha1, 20);
 468                SHA1_Update(&ctx, obj->sha1, 20);
 469        }
 470        sha1write(f, pack_base + pack_size - 20, 20);
 471        sha1close(f, NULL, 1);
 472        free(sorted_by_sha);
 473        SHA1_Final(sha1, &ctx);
 474}
 475
 476int main(int argc, char **argv)
 477{
 478        int i;
 479        char *index_name = NULL;
 480        char *index_name_buf = NULL;
 481        unsigned char sha1[20];
 482
 483        for (i = 1; i < argc; i++) {
 484                const char *arg = argv[i];
 485
 486                if (*arg == '-') {
 487                        if (!strcmp(arg, "-o")) {
 488                                if (index_name || (i+1) >= argc)
 489                                        usage(index_pack_usage);
 490                                index_name = argv[++i];
 491                        } else
 492                                usage(index_pack_usage);
 493                        continue;
 494                }
 495
 496                if (pack_name)
 497                        usage(index_pack_usage);
 498                pack_name = arg;
 499        }
 500
 501        if (!pack_name)
 502                usage(index_pack_usage);
 503        if (!index_name) {
 504                int len = strlen(pack_name);
 505                if (!has_extension(pack_name, ".pack"))
 506                        die("packfile name '%s' does not end with '.pack'",
 507                            pack_name);
 508                index_name_buf = xmalloc(len);
 509                memcpy(index_name_buf, pack_name, len - 5);
 510                strcpy(index_name_buf + len - 5, ".idx");
 511                index_name = index_name_buf;
 512        }
 513
 514        open_pack_file();
 515        parse_pack_header();
 516        objects = xcalloc(nr_objects, sizeof(struct object_entry));
 517        deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
 518        parse_pack_objects();
 519        free(deltas);
 520        write_index_file(index_name, sha1);
 521        free(objects);
 522        free(index_name_buf);
 523
 524        printf("%s\n", sha1_to_hex(sha1));
 525
 526        return 0;
 527}