cecdd2680c7cbdfce85c352ed59e1eaef673f38c
   1#include "cache.h"
   2#include "delta.h"
   3#include "pack.h"
   4#include "csum-file.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9
  10static const char index_pack_usage[] =
  11"git-index-pack [-o <index-file>] { <pack-file> | --stdin [<pack-file>] }";
  12
  13struct object_entry
  14{
  15        unsigned long offset;
  16        unsigned long size;
  17        unsigned int hdr_size;
  18        enum object_type type;
  19        enum object_type real_type;
  20        unsigned char sha1[20];
  21};
  22
  23union delta_base {
  24        unsigned char sha1[20];
  25        unsigned long offset;
  26};
  27
  28/*
  29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
  30 * to memcmp() only the first 20 bytes.
  31 */
  32#define UNION_BASE_SZ   20
  33
  34struct delta_entry
  35{
  36        struct object_entry *obj;
  37        union delta_base base;
  38};
  39
  40static struct object_entry *objects;
  41static struct delta_entry *deltas;
  42static int nr_objects;
  43static int nr_deltas;
  44
  45static int from_stdin;
  46
  47/* We always read in 4kB chunks. */
  48static unsigned char input_buffer[4096];
  49static unsigned long input_offset, input_len, consumed_bytes;
  50static SHA_CTX input_ctx;
  51static int input_fd, output_fd, mmap_fd;
  52
  53/*
  54 * Make sure at least "min" bytes are available in the buffer, and
  55 * return the pointer to the buffer.
  56 */
  57static void * fill(int min)
  58{
  59        if (min <= input_len)
  60                return input_buffer + input_offset;
  61        if (min > sizeof(input_buffer))
  62                die("cannot fill %d bytes", min);
  63        if (input_offset) {
  64                if (output_fd >= 0)
  65                        write_or_die(output_fd, input_buffer, input_offset);
  66                SHA1_Update(&input_ctx, input_buffer, input_offset);
  67                memcpy(input_buffer, input_buffer + input_offset, input_len);
  68                input_offset = 0;
  69        }
  70        do {
  71                int ret = xread(input_fd, input_buffer + input_len,
  72                                sizeof(input_buffer) - input_len);
  73                if (ret <= 0) {
  74                        if (!ret)
  75                                die("early EOF");
  76                        die("read error on input: %s", strerror(errno));
  77                }
  78                input_len += ret;
  79        } while (input_len < min);
  80        return input_buffer;
  81}
  82
  83static void use(int bytes)
  84{
  85        if (bytes > input_len)
  86                die("used more bytes than were available");
  87        input_len -= bytes;
  88        input_offset += bytes;
  89        consumed_bytes += bytes;
  90}
  91
  92static const char * open_pack_file(const char *pack_name)
  93{
  94        if (from_stdin) {
  95                input_fd = 0;
  96                if (!pack_name) {
  97                        static char tmpfile[PATH_MAX];
  98                        snprintf(tmpfile, sizeof(tmpfile),
  99                                 "%s/pack_XXXXXX", get_object_directory());
 100                        output_fd = mkstemp(tmpfile);
 101                        pack_name = xstrdup(tmpfile);
 102                } else
 103                        output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
 104                if (output_fd < 0)
 105                        die("unable to create %s: %s\n", pack_name, strerror(errno));
 106                mmap_fd = output_fd;
 107        } else {
 108                input_fd = open(pack_name, O_RDONLY);
 109                if (input_fd < 0)
 110                        die("cannot open packfile '%s': %s",
 111                            pack_name, strerror(errno));
 112                output_fd = -1;
 113                mmap_fd = input_fd;
 114        }
 115        SHA1_Init(&input_ctx);
 116        return pack_name;
 117}
 118
 119static void parse_pack_header(void)
 120{
 121        struct pack_header *hdr = fill(sizeof(struct pack_header));
 122
 123        /* Header consistency check */
 124        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
 125                die("pack signature mismatch");
 126        if (!pack_version_ok(hdr->hdr_version))
 127                die("pack version %d unsupported", ntohl(hdr->hdr_version));
 128
 129        nr_objects = ntohl(hdr->hdr_entries);
 130        use(sizeof(struct pack_header));
 131        /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
 132}
 133
 134static void bad_object(unsigned long offset, const char *format,
 135                       ...) NORETURN __attribute__((format (printf, 2, 3)));
 136
 137static void bad_object(unsigned long offset, const char *format, ...)
 138{
 139        va_list params;
 140        char buf[1024];
 141
 142        va_start(params, format);
 143        vsnprintf(buf, sizeof(buf), format, params);
 144        va_end(params);
 145        die("pack has bad object at offset %lu: %s", offset, buf);
 146}
 147
 148static void *unpack_entry_data(unsigned long offset, unsigned long size)
 149{
 150        z_stream stream;
 151        void *buf = xmalloc(size);
 152
 153        memset(&stream, 0, sizeof(stream));
 154        stream.next_out = buf;
 155        stream.avail_out = size;
 156        stream.next_in = fill(1);
 157        stream.avail_in = input_len;
 158        inflateInit(&stream);
 159
 160        for (;;) {
 161                int ret = inflate(&stream, 0);
 162                use(input_len - stream.avail_in);
 163                if (stream.total_out == size && ret == Z_STREAM_END)
 164                        break;
 165                if (ret != Z_OK)
 166                        bad_object(offset, "inflate returned %d", ret);
 167                stream.next_in = fill(1);
 168                stream.avail_in = input_len;
 169        }
 170        inflateEnd(&stream);
 171        return buf;
 172}
 173
 174static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 175{
 176        unsigned char *p, c;
 177        unsigned long size, base_offset;
 178        unsigned shift;
 179
 180        obj->offset = consumed_bytes;
 181
 182        p = fill(1);
 183        c = *p;
 184        use(1);
 185        obj->type = (c >> 4) & 7;
 186        size = (c & 15);
 187        shift = 4;
 188        while (c & 0x80) {
 189                p = fill(1);
 190                c = *p;
 191                use(1);
 192                size += (c & 0x7fUL) << shift;
 193                shift += 7;
 194        }
 195        obj->size = size;
 196
 197        switch (obj->type) {
 198        case OBJ_REF_DELTA:
 199                hashcpy(delta_base->sha1, fill(20));
 200                use(20);
 201                break;
 202        case OBJ_OFS_DELTA:
 203                memset(delta_base, 0, sizeof(*delta_base));
 204                p = fill(1);
 205                c = *p;
 206                use(1);
 207                base_offset = c & 127;
 208                while (c & 128) {
 209                        base_offset += 1;
 210                        if (!base_offset || base_offset & ~(~0UL >> 7))
 211                                bad_object(obj->offset, "offset value overflow for delta base object");
 212                        p = fill(1);
 213                        c = *p;
 214                        use(1);
 215                        base_offset = (base_offset << 7) + (c & 127);
 216                }
 217                delta_base->offset = obj->offset - base_offset;
 218                if (delta_base->offset >= obj->offset)
 219                        bad_object(obj->offset, "delta base offset is out of bound");
 220                break;
 221        case OBJ_COMMIT:
 222        case OBJ_TREE:
 223        case OBJ_BLOB:
 224        case OBJ_TAG:
 225                break;
 226        default:
 227                bad_object(obj->offset, "bad object type %d", obj->type);
 228        }
 229        obj->hdr_size = consumed_bytes - obj->offset;
 230
 231        return unpack_entry_data(obj->offset, obj->size);
 232}
 233
 234static void * get_data_from_pack(struct object_entry *obj)
 235{
 236        unsigned long from = obj[0].offset + obj[0].hdr_size;
 237        unsigned long len = obj[1].offset - from;
 238        unsigned pg_offset = from % getpagesize();
 239        unsigned char *map, *data;
 240        z_stream stream;
 241        int st;
 242
 243        map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
 244                   mmap_fd, from - pg_offset);
 245        if (map == MAP_FAILED)
 246                die("cannot mmap pack file: %s", strerror(errno));
 247        data = xmalloc(obj->size);
 248        memset(&stream, 0, sizeof(stream));
 249        stream.next_out = data;
 250        stream.avail_out = obj->size;
 251        stream.next_in = map + pg_offset;
 252        stream.avail_in = len;
 253        inflateInit(&stream);
 254        while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
 255        inflateEnd(&stream);
 256        if (st != Z_STREAM_END || stream.total_out != obj->size)
 257                die("serious inflate inconsistency");
 258        munmap(map, len + pg_offset);
 259        return data;
 260}
 261
 262static int find_delta(const union delta_base *base)
 263{
 264        int first = 0, last = nr_deltas;
 265
 266        while (first < last) {
 267                int next = (first + last) / 2;
 268                struct delta_entry *delta = &deltas[next];
 269                int cmp;
 270
 271                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
 272                if (!cmp)
 273                        return next;
 274                if (cmp < 0) {
 275                        last = next;
 276                        continue;
 277                }
 278                first = next+1;
 279        }
 280        return -first-1;
 281}
 282
 283static int find_delta_childs(const union delta_base *base,
 284                             int *first_index, int *last_index)
 285{
 286        int first = find_delta(base);
 287        int last = first;
 288        int end = nr_deltas - 1;
 289
 290        if (first < 0)
 291                return -1;
 292        while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 293                --first;
 294        while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 295                ++last;
 296        *first_index = first;
 297        *last_index = last;
 298        return 0;
 299}
 300
 301static void sha1_object(const void *data, unsigned long size,
 302                        enum object_type type, unsigned char *sha1)
 303{
 304        SHA_CTX ctx;
 305        char header[50];
 306        int header_size;
 307        const char *type_str;
 308
 309        switch (type) {
 310        case OBJ_COMMIT: type_str = commit_type; break;
 311        case OBJ_TREE:   type_str = tree_type; break;
 312        case OBJ_BLOB:   type_str = blob_type; break;
 313        case OBJ_TAG:    type_str = tag_type; break;
 314        default:
 315                die("bad type %d", type);
 316        }
 317
 318        header_size = sprintf(header, "%s %lu", type_str, size) + 1;
 319
 320        SHA1_Init(&ctx);
 321        SHA1_Update(&ctx, header, header_size);
 322        SHA1_Update(&ctx, data, size);
 323        SHA1_Final(sha1, &ctx);
 324}
 325
 326static void resolve_delta(struct delta_entry *delta, void *base_data,
 327                          unsigned long base_size, enum object_type type)
 328{
 329        struct object_entry *obj = delta->obj;
 330        void *delta_data;
 331        unsigned long delta_size;
 332        void *result;
 333        unsigned long result_size;
 334        union delta_base delta_base;
 335        int j, first, last;
 336
 337        obj->real_type = type;
 338        delta_data = get_data_from_pack(obj);
 339        delta_size = obj->size;
 340        result = patch_delta(base_data, base_size, delta_data, delta_size,
 341                             &result_size);
 342        free(delta_data);
 343        if (!result)
 344                bad_object(obj->offset, "failed to apply delta");
 345        sha1_object(result, result_size, type, obj->sha1);
 346
 347        hashcpy(delta_base.sha1, obj->sha1);
 348        if (!find_delta_childs(&delta_base, &first, &last)) {
 349                for (j = first; j <= last; j++)
 350                        if (deltas[j].obj->type == OBJ_REF_DELTA)
 351                                resolve_delta(&deltas[j], result, result_size, type);
 352        }
 353
 354        memset(&delta_base, 0, sizeof(delta_base));
 355        delta_base.offset = obj->offset;
 356        if (!find_delta_childs(&delta_base, &first, &last)) {
 357                for (j = first; j <= last; j++)
 358                        if (deltas[j].obj->type == OBJ_OFS_DELTA)
 359                                resolve_delta(&deltas[j], result, result_size, type);
 360        }
 361
 362        free(result);
 363}
 364
 365static int compare_delta_entry(const void *a, const void *b)
 366{
 367        const struct delta_entry *delta_a = a;
 368        const struct delta_entry *delta_b = b;
 369        return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 370}
 371
 372/* Parse all objects and return the pack content SHA1 hash */
 373static void parse_pack_objects(unsigned char *sha1)
 374{
 375        int i;
 376        struct delta_entry *delta = deltas;
 377        void *data;
 378        struct stat st;
 379
 380        /*
 381         * First pass:
 382         * - find locations of all objects;
 383         * - calculate SHA1 of all non-delta objects;
 384         * - remember base SHA1 for all deltas.
 385         */
 386        for (i = 0; i < nr_objects; i++) {
 387                struct object_entry *obj = &objects[i];
 388                data = unpack_raw_entry(obj, &delta->base);
 389                obj->real_type = obj->type;
 390                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 391                        nr_deltas++;
 392                        delta->obj = obj;
 393                        delta++;
 394                } else
 395                        sha1_object(data, obj->size, obj->type, obj->sha1);
 396                free(data);
 397        }
 398        objects[i].offset = consumed_bytes;
 399
 400        /* Check pack integrity */
 401        SHA1_Update(&input_ctx, input_buffer, input_offset);
 402        SHA1_Final(sha1, &input_ctx);
 403        if (hashcmp(fill(20), sha1))
 404                die("pack is corrupted (SHA1 mismatch)");
 405        use(20);
 406        if (output_fd >= 0)
 407                write_or_die(output_fd, input_buffer, input_offset);
 408
 409        /* If input_fd is a file, we should have reached its end now. */
 410        if (fstat(input_fd, &st))
 411                die("cannot fstat packfile: %s", strerror(errno));
 412        if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
 413                die("pack has junk at the end");
 414
 415        /* Sort deltas by base SHA1/offset for fast searching */
 416        qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 417              compare_delta_entry);
 418
 419        /*
 420         * Second pass:
 421         * - for all non-delta objects, look if it is used as a base for
 422         *   deltas;
 423         * - if used as a base, uncompress the object and apply all deltas,
 424         *   recursively checking if the resulting object is used as a base
 425         *   for some more deltas.
 426         */
 427        for (i = 0; i < nr_objects; i++) {
 428                struct object_entry *obj = &objects[i];
 429                union delta_base base;
 430                int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
 431
 432                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 433                        continue;
 434                hashcpy(base.sha1, obj->sha1);
 435                ref = !find_delta_childs(&base, &ref_first, &ref_last);
 436                memset(&base, 0, sizeof(base));
 437                base.offset = obj->offset;
 438                ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
 439                if (!ref && !ofs)
 440                        continue;
 441                data = get_data_from_pack(obj);
 442                if (ref)
 443                        for (j = ref_first; j <= ref_last; j++)
 444                                if (deltas[j].obj->type == OBJ_REF_DELTA)
 445                                        resolve_delta(&deltas[j], data,
 446                                                      obj->size, obj->type);
 447                if (ofs)
 448                        for (j = ofs_first; j <= ofs_last; j++)
 449                                if (deltas[j].obj->type == OBJ_OFS_DELTA)
 450                                        resolve_delta(&deltas[j], data,
 451                                                      obj->size, obj->type);
 452                free(data);
 453        }
 454
 455        /* Check for unresolved deltas */
 456        for (i = 0; i < nr_deltas; i++) {
 457                if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
 458                    deltas[i].obj->real_type == OBJ_OFS_DELTA)
 459                        die("pack has unresolved deltas");
 460        }
 461}
 462
 463static int sha1_compare(const void *_a, const void *_b)
 464{
 465        struct object_entry *a = *(struct object_entry **)_a;
 466        struct object_entry *b = *(struct object_entry **)_b;
 467        return hashcmp(a->sha1, b->sha1);
 468}
 469
 470/*
 471 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 472 * the SHA1 hash of sorted object names.
 473 */
 474static const char * write_index_file(const char *index_name, unsigned char *sha1)
 475{
 476        struct sha1file *f;
 477        struct object_entry **sorted_by_sha, **list, **last;
 478        unsigned int array[256];
 479        int i, fd;
 480        SHA_CTX ctx;
 481
 482        if (nr_objects) {
 483                sorted_by_sha =
 484                        xcalloc(nr_objects, sizeof(struct object_entry *));
 485                list = sorted_by_sha;
 486                last = sorted_by_sha + nr_objects;
 487                for (i = 0; i < nr_objects; ++i)
 488                        sorted_by_sha[i] = &objects[i];
 489                qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
 490                      sha1_compare);
 491
 492        }
 493        else
 494                sorted_by_sha = list = last = NULL;
 495
 496        if (!index_name) {
 497                static char tmpfile[PATH_MAX];
 498                snprintf(tmpfile, sizeof(tmpfile),
 499                         "%s/index_XXXXXX", get_object_directory());
 500                fd = mkstemp(tmpfile);
 501                index_name = xstrdup(tmpfile);
 502        } else {
 503                unlink(index_name);
 504                fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
 505        }
 506        if (fd < 0)
 507                die("unable to create %s: %s", index_name, strerror(errno));
 508        f = sha1fd(fd, index_name);
 509
 510        /*
 511         * Write the first-level table (the list is sorted,
 512         * but we use a 256-entry lookup to be able to avoid
 513         * having to do eight extra binary search iterations).
 514         */
 515        for (i = 0; i < 256; i++) {
 516                struct object_entry **next = list;
 517                while (next < last) {
 518                        struct object_entry *obj = *next;
 519                        if (obj->sha1[0] != i)
 520                                break;
 521                        next++;
 522                }
 523                array[i] = htonl(next - sorted_by_sha);
 524                list = next;
 525        }
 526        sha1write(f, array, 256 * sizeof(int));
 527
 528        /* recompute the SHA1 hash of sorted object names.
 529         * currently pack-objects does not do this, but that
 530         * can be fixed.
 531         */
 532        SHA1_Init(&ctx);
 533        /*
 534         * Write the actual SHA1 entries..
 535         */
 536        list = sorted_by_sha;
 537        for (i = 0; i < nr_objects; i++) {
 538                struct object_entry *obj = *list++;
 539                unsigned int offset = htonl(obj->offset);
 540                sha1write(f, &offset, 4);
 541                sha1write(f, obj->sha1, 20);
 542                SHA1_Update(&ctx, obj->sha1, 20);
 543        }
 544        sha1write(f, sha1, 20);
 545        sha1close(f, NULL, 1);
 546        free(sorted_by_sha);
 547        SHA1_Final(sha1, &ctx);
 548        return index_name;
 549}
 550
 551static void final(const char *final_pack_name, const char *curr_pack_name,
 552                  const char *final_index_name, const char *curr_index_name,
 553                  unsigned char *sha1)
 554{
 555        char name[PATH_MAX];
 556        int err;
 557
 558        if (!from_stdin) {
 559                close(input_fd);
 560        } else {
 561                err = close(output_fd);
 562                if (err)
 563                        die("error while closing pack file: %s", strerror(errno));
 564                chmod(curr_pack_name, 0444);
 565        }
 566
 567        if (final_pack_name != curr_pack_name) {
 568                if (!final_pack_name) {
 569                        snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
 570                                 get_object_directory(), sha1_to_hex(sha1));
 571                        final_pack_name = name;
 572                }
 573                if (move_temp_to_file(curr_pack_name, final_pack_name))
 574                        die("cannot store pack file");
 575        }
 576
 577        chmod(curr_index_name, 0444);
 578        if (final_index_name != curr_index_name) {
 579                if (!final_index_name) {
 580                        snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
 581                                 get_object_directory(), sha1_to_hex(sha1));
 582                        final_index_name = name;
 583                }
 584                if (move_temp_to_file(curr_index_name, final_index_name))
 585                        die("cannot store index file");
 586        }
 587}
 588
 589int main(int argc, char **argv)
 590{
 591        int i;
 592        const char *curr_pack, *pack_name = NULL;
 593        const char *curr_index, *index_name = NULL;
 594        char *index_name_buf = NULL;
 595        unsigned char sha1[20];
 596
 597        for (i = 1; i < argc; i++) {
 598                const char *arg = argv[i];
 599
 600                if (*arg == '-') {
 601                        if (!strcmp(arg, "--stdin")) {
 602                                from_stdin = 1;
 603                        } else if (!strcmp(arg, "-o")) {
 604                                if (index_name || (i+1) >= argc)
 605                                        usage(index_pack_usage);
 606                                index_name = argv[++i];
 607                        } else
 608                                usage(index_pack_usage);
 609                        continue;
 610                }
 611
 612                if (pack_name)
 613                        usage(index_pack_usage);
 614                pack_name = arg;
 615        }
 616
 617        if (!pack_name && !from_stdin)
 618                usage(index_pack_usage);
 619        if (!index_name && pack_name) {
 620                int len = strlen(pack_name);
 621                if (!has_extension(pack_name, ".pack"))
 622                        die("packfile name '%s' does not end with '.pack'",
 623                            pack_name);
 624                index_name_buf = xmalloc(len);
 625                memcpy(index_name_buf, pack_name, len - 5);
 626                strcpy(index_name_buf + len - 5, ".idx");
 627                index_name = index_name_buf;
 628        }
 629
 630        curr_pack = open_pack_file(pack_name);
 631        parse_pack_header();
 632        objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
 633        deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
 634        parse_pack_objects(sha1);
 635        free(deltas);
 636        curr_index = write_index_file(index_name, sha1);
 637        final(pack_name, curr_pack, index_name, curr_index, sha1);
 638        free(objects);
 639        free(index_name_buf);
 640
 641        printf("%s\n", sha1_to_hex(sha1));
 642
 643        return 0;
 644}