index-pack.con commit allow forcing index v2 and 64-bit offset treshold (4ba7d71)
   1#include "cache.h"
   2#include "delta.h"
   3#include "pack.h"
   4#include "csum-file.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9
  10static const char index_pack_usage[] =
  11"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
  12
  13struct object_entry
  14{
  15        off_t offset;
  16        unsigned long size;
  17        unsigned int hdr_size;
  18        uint32_t crc32;
  19        enum object_type type;
  20        enum object_type real_type;
  21        unsigned char sha1[20];
  22};
  23
  24union delta_base {
  25        unsigned char sha1[20];
  26        off_t offset;
  27};
  28
  29/*
  30 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
  31 * to memcmp() only the first 20 bytes.
  32 */
  33#define UNION_BASE_SZ   20
  34
  35struct delta_entry
  36{
  37        union delta_base base;
  38        int obj_no;
  39};
  40
  41static struct object_entry *objects;
  42static struct delta_entry *deltas;
  43static int nr_objects;
  44static int nr_deltas;
  45static int nr_resolved_deltas;
  46
  47static int from_stdin;
  48static int verbose;
  49
  50static volatile sig_atomic_t progress_update;
  51
  52static void progress_interval(int signum)
  53{
  54        progress_update = 1;
  55}
  56
  57static void setup_progress_signal(void)
  58{
  59        struct sigaction sa;
  60        struct itimerval v;
  61
  62        memset(&sa, 0, sizeof(sa));
  63        sa.sa_handler = progress_interval;
  64        sigemptyset(&sa.sa_mask);
  65        sa.sa_flags = SA_RESTART;
  66        sigaction(SIGALRM, &sa, NULL);
  67
  68        v.it_interval.tv_sec = 1;
  69        v.it_interval.tv_usec = 0;
  70        v.it_value = v.it_interval;
  71        setitimer(ITIMER_REAL, &v, NULL);
  72
  73}
  74
  75static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
  76{
  77        unsigned percent = n * 100 / total;
  78        if (percent != last_pc || progress_update) {
  79                fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
  80                progress_update = 0;
  81        }
  82        return percent;
  83}
  84
  85/* We always read in 4kB chunks. */
  86static unsigned char input_buffer[4096];
  87static unsigned int input_offset, input_len;
  88static off_t consumed_bytes;
  89static SHA_CTX input_ctx;
  90static uint32_t input_crc32;
  91static int input_fd, output_fd, pack_fd;
  92
  93/* Discard current buffer used content. */
  94static void flush(void)
  95{
  96        if (input_offset) {
  97                if (output_fd >= 0)
  98                        write_or_die(output_fd, input_buffer, input_offset);
  99                SHA1_Update(&input_ctx, input_buffer, input_offset);
 100                memmove(input_buffer, input_buffer + input_offset, input_len);
 101                input_offset = 0;
 102        }
 103}
 104
 105/*
 106 * Make sure at least "min" bytes are available in the buffer, and
 107 * return the pointer to the buffer.
 108 */
 109static void *fill(int min)
 110{
 111        if (min <= input_len)
 112                return input_buffer + input_offset;
 113        if (min > sizeof(input_buffer))
 114                die("cannot fill %d bytes", min);
 115        flush();
 116        do {
 117                int ret = xread(input_fd, input_buffer + input_len,
 118                                sizeof(input_buffer) - input_len);
 119                if (ret <= 0) {
 120                        if (!ret)
 121                                die("early EOF");
 122                        die("read error on input: %s", strerror(errno));
 123                }
 124                input_len += ret;
 125        } while (input_len < min);
 126        return input_buffer;
 127}
 128
 129static void use(int bytes)
 130{
 131        if (bytes > input_len)
 132                die("used more bytes than were available");
 133        input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
 134        input_len -= bytes;
 135        input_offset += bytes;
 136
 137        /* make sure off_t is sufficiently large not to wrap */
 138        if (consumed_bytes > consumed_bytes + bytes)
 139                die("pack too large for current definition of off_t");
 140        consumed_bytes += bytes;
 141}
 142
 143static const char *open_pack_file(const char *pack_name)
 144{
 145        if (from_stdin) {
 146                input_fd = 0;
 147                if (!pack_name) {
 148                        static char tmpfile[PATH_MAX];
 149                        snprintf(tmpfile, sizeof(tmpfile),
 150                                 "%s/tmp_pack_XXXXXX", get_object_directory());
 151                        output_fd = mkstemp(tmpfile);
 152                        pack_name = xstrdup(tmpfile);
 153                } else
 154                        output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
 155                if (output_fd < 0)
 156                        die("unable to create %s: %s\n", pack_name, strerror(errno));
 157                pack_fd = output_fd;
 158        } else {
 159                input_fd = open(pack_name, O_RDONLY);
 160                if (input_fd < 0)
 161                        die("cannot open packfile '%s': %s",
 162                            pack_name, strerror(errno));
 163                output_fd = -1;
 164                pack_fd = input_fd;
 165        }
 166        SHA1_Init(&input_ctx);
 167        return pack_name;
 168}
 169
 170static void parse_pack_header(void)
 171{
 172        struct pack_header *hdr = fill(sizeof(struct pack_header));
 173
 174        /* Header consistency check */
 175        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
 176                die("pack signature mismatch");
 177        if (!pack_version_ok(hdr->hdr_version))
 178                die("pack version %d unsupported", ntohl(hdr->hdr_version));
 179
 180        nr_objects = ntohl(hdr->hdr_entries);
 181        use(sizeof(struct pack_header));
 182}
 183
 184static void bad_object(unsigned long offset, const char *format,
 185                       ...) NORETURN __attribute__((format (printf, 2, 3)));
 186
 187static void bad_object(unsigned long offset, const char *format, ...)
 188{
 189        va_list params;
 190        char buf[1024];
 191
 192        va_start(params, format);
 193        vsnprintf(buf, sizeof(buf), format, params);
 194        va_end(params);
 195        die("pack has bad object at offset %lu: %s", offset, buf);
 196}
 197
 198static void *unpack_entry_data(unsigned long offset, unsigned long size)
 199{
 200        z_stream stream;
 201        void *buf = xmalloc(size);
 202
 203        memset(&stream, 0, sizeof(stream));
 204        stream.next_out = buf;
 205        stream.avail_out = size;
 206        stream.next_in = fill(1);
 207        stream.avail_in = input_len;
 208        inflateInit(&stream);
 209
 210        for (;;) {
 211                int ret = inflate(&stream, 0);
 212                use(input_len - stream.avail_in);
 213                if (stream.total_out == size && ret == Z_STREAM_END)
 214                        break;
 215                if (ret != Z_OK)
 216                        bad_object(offset, "inflate returned %d", ret);
 217                stream.next_in = fill(1);
 218                stream.avail_in = input_len;
 219        }
 220        inflateEnd(&stream);
 221        return buf;
 222}
 223
 224static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 225{
 226        unsigned char *p, c;
 227        unsigned long size;
 228        off_t base_offset;
 229        unsigned shift;
 230        void *data;
 231
 232        obj->offset = consumed_bytes;
 233        input_crc32 = crc32(0, Z_NULL, 0);
 234
 235        p = fill(1);
 236        c = *p;
 237        use(1);
 238        obj->type = (c >> 4) & 7;
 239        size = (c & 15);
 240        shift = 4;
 241        while (c & 0x80) {
 242                p = fill(1);
 243                c = *p;
 244                use(1);
 245                size += (c & 0x7fUL) << shift;
 246                shift += 7;
 247        }
 248        obj->size = size;
 249
 250        switch (obj->type) {
 251        case OBJ_REF_DELTA:
 252                hashcpy(delta_base->sha1, fill(20));
 253                use(20);
 254                break;
 255        case OBJ_OFS_DELTA:
 256                memset(delta_base, 0, sizeof(*delta_base));
 257                p = fill(1);
 258                c = *p;
 259                use(1);
 260                base_offset = c & 127;
 261                while (c & 128) {
 262                        base_offset += 1;
 263                        if (!base_offset || MSB(base_offset, 7))
 264                                bad_object(obj->offset, "offset value overflow for delta base object");
 265                        p = fill(1);
 266                        c = *p;
 267                        use(1);
 268                        base_offset = (base_offset << 7) + (c & 127);
 269                }
 270                delta_base->offset = obj->offset - base_offset;
 271                if (delta_base->offset >= obj->offset)
 272                        bad_object(obj->offset, "delta base offset is out of bound");
 273                break;
 274        case OBJ_COMMIT:
 275        case OBJ_TREE:
 276        case OBJ_BLOB:
 277        case OBJ_TAG:
 278                break;
 279        default:
 280                bad_object(obj->offset, "unknown object type %d", obj->type);
 281        }
 282        obj->hdr_size = consumed_bytes - obj->offset;
 283
 284        data = unpack_entry_data(obj->offset, obj->size);
 285        obj->crc32 = input_crc32;
 286        return data;
 287}
 288
 289static void *get_data_from_pack(struct object_entry *obj)
 290{
 291        unsigned long from = obj[0].offset + obj[0].hdr_size;
 292        unsigned long len = obj[1].offset - from;
 293        unsigned long rdy = 0;
 294        unsigned char *src, *data;
 295        z_stream stream;
 296        int st;
 297
 298        src = xmalloc(len);
 299        data = src;
 300        do {
 301                ssize_t n = pread(pack_fd, data + rdy, len - rdy, from + rdy);
 302                if (n <= 0)
 303                        die("cannot pread pack file: %s", strerror(errno));
 304                rdy += n;
 305        } while (rdy < len);
 306        data = xmalloc(obj->size);
 307        memset(&stream, 0, sizeof(stream));
 308        stream.next_out = data;
 309        stream.avail_out = obj->size;
 310        stream.next_in = src;
 311        stream.avail_in = len;
 312        inflateInit(&stream);
 313        while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
 314        inflateEnd(&stream);
 315        if (st != Z_STREAM_END || stream.total_out != obj->size)
 316                die("serious inflate inconsistency");
 317        free(src);
 318        return data;
 319}
 320
 321static int find_delta(const union delta_base *base)
 322{
 323        int first = 0, last = nr_deltas;
 324
 325        while (first < last) {
 326                int next = (first + last) / 2;
 327                struct delta_entry *delta = &deltas[next];
 328                int cmp;
 329
 330                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
 331                if (!cmp)
 332                        return next;
 333                if (cmp < 0) {
 334                        last = next;
 335                        continue;
 336                }
 337                first = next+1;
 338        }
 339        return -first-1;
 340}
 341
 342static int find_delta_children(const union delta_base *base,
 343                               int *first_index, int *last_index)
 344{
 345        int first = find_delta(base);
 346        int last = first;
 347        int end = nr_deltas - 1;
 348
 349        if (first < 0)
 350                return -1;
 351        while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 352                --first;
 353        while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 354                ++last;
 355        *first_index = first;
 356        *last_index = last;
 357        return 0;
 358}
 359
 360static void sha1_object(const void *data, unsigned long size,
 361                        enum object_type type, unsigned char *sha1)
 362{
 363        hash_sha1_file(data, size, typename(type), sha1);
 364        if (has_sha1_file(sha1)) {
 365                void *has_data;
 366                enum object_type has_type;
 367                unsigned long has_size;
 368                has_data = read_sha1_file(sha1, &has_type, &has_size);
 369                if (!has_data)
 370                        die("cannot read existing object %s", sha1_to_hex(sha1));
 371                if (size != has_size || type != has_type ||
 372                    memcmp(data, has_data, size) != 0)
 373                        die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
 374                free(has_data);
 375        }
 376}
 377
 378static void resolve_delta(struct object_entry *delta_obj, void *base_data,
 379                          unsigned long base_size, enum object_type type)
 380{
 381        void *delta_data;
 382        unsigned long delta_size;
 383        void *result;
 384        unsigned long result_size;
 385        union delta_base delta_base;
 386        int j, first, last;
 387
 388        delta_obj->real_type = type;
 389        delta_data = get_data_from_pack(delta_obj);
 390        delta_size = delta_obj->size;
 391        result = patch_delta(base_data, base_size, delta_data, delta_size,
 392                             &result_size);
 393        free(delta_data);
 394        if (!result)
 395                bad_object(delta_obj->offset, "failed to apply delta");
 396        sha1_object(result, result_size, type, delta_obj->sha1);
 397        nr_resolved_deltas++;
 398
 399        hashcpy(delta_base.sha1, delta_obj->sha1);
 400        if (!find_delta_children(&delta_base, &first, &last)) {
 401                for (j = first; j <= last; j++) {
 402                        struct object_entry *child = objects + deltas[j].obj_no;
 403                        if (child->real_type == OBJ_REF_DELTA)
 404                                resolve_delta(child, result, result_size, type);
 405                }
 406        }
 407
 408        memset(&delta_base, 0, sizeof(delta_base));
 409        delta_base.offset = delta_obj->offset;
 410        if (!find_delta_children(&delta_base, &first, &last)) {
 411                for (j = first; j <= last; j++) {
 412                        struct object_entry *child = objects + deltas[j].obj_no;
 413                        if (child->real_type == OBJ_OFS_DELTA)
 414                                resolve_delta(child, result, result_size, type);
 415                }
 416        }
 417
 418        free(result);
 419}
 420
 421static int compare_delta_entry(const void *a, const void *b)
 422{
 423        const struct delta_entry *delta_a = a;
 424        const struct delta_entry *delta_b = b;
 425        return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 426}
 427
 428/* Parse all objects and return the pack content SHA1 hash */
 429static void parse_pack_objects(unsigned char *sha1)
 430{
 431        int i, percent = -1;
 432        struct delta_entry *delta = deltas;
 433        void *data;
 434        struct stat st;
 435
 436        /*
 437         * First pass:
 438         * - find locations of all objects;
 439         * - calculate SHA1 of all non-delta objects;
 440         * - remember base (SHA1 or offset) for all deltas.
 441         */
 442        if (verbose)
 443                fprintf(stderr, "Indexing %d objects.\n", nr_objects);
 444        for (i = 0; i < nr_objects; i++) {
 445                struct object_entry *obj = &objects[i];
 446                data = unpack_raw_entry(obj, &delta->base);
 447                obj->real_type = obj->type;
 448                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 449                        nr_deltas++;
 450                        delta->obj_no = i;
 451                        delta++;
 452                } else
 453                        sha1_object(data, obj->size, obj->type, obj->sha1);
 454                free(data);
 455                if (verbose)
 456                        percent = display_progress(i+1, nr_objects, percent);
 457        }
 458        objects[i].offset = consumed_bytes;
 459        if (verbose)
 460                fputc('\n', stderr);
 461
 462        /* Check pack integrity */
 463        flush();
 464        SHA1_Final(sha1, &input_ctx);
 465        if (hashcmp(fill(20), sha1))
 466                die("pack is corrupted (SHA1 mismatch)");
 467        use(20);
 468
 469        /* If input_fd is a file, we should have reached its end now. */
 470        if (fstat(input_fd, &st))
 471                die("cannot fstat packfile: %s", strerror(errno));
 472        if (S_ISREG(st.st_mode) &&
 473                        lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
 474                die("pack has junk at the end");
 475
 476        if (!nr_deltas)
 477                return;
 478
 479        /* Sort deltas by base SHA1/offset for fast searching */
 480        qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 481              compare_delta_entry);
 482
 483        /*
 484         * Second pass:
 485         * - for all non-delta objects, look if it is used as a base for
 486         *   deltas;
 487         * - if used as a base, uncompress the object and apply all deltas,
 488         *   recursively checking if the resulting object is used as a base
 489         *   for some more deltas.
 490         */
 491        if (verbose)
 492                fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
 493        for (i = 0; i < nr_objects; i++) {
 494                struct object_entry *obj = &objects[i];
 495                union delta_base base;
 496                int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
 497
 498                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 499                        continue;
 500                hashcpy(base.sha1, obj->sha1);
 501                ref = !find_delta_children(&base, &ref_first, &ref_last);
 502                memset(&base, 0, sizeof(base));
 503                base.offset = obj->offset;
 504                ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
 505                if (!ref && !ofs)
 506                        continue;
 507                data = get_data_from_pack(obj);
 508                if (ref)
 509                        for (j = ref_first; j <= ref_last; j++) {
 510                                struct object_entry *child = objects + deltas[j].obj_no;
 511                                if (child->real_type == OBJ_REF_DELTA)
 512                                        resolve_delta(child, data,
 513                                                      obj->size, obj->type);
 514                        }
 515                if (ofs)
 516                        for (j = ofs_first; j <= ofs_last; j++) {
 517                                struct object_entry *child = objects + deltas[j].obj_no;
 518                                if (child->real_type == OBJ_OFS_DELTA)
 519                                        resolve_delta(child, data,
 520                                                      obj->size, obj->type);
 521                        }
 522                free(data);
 523                if (verbose)
 524                        percent = display_progress(nr_resolved_deltas,
 525                                                   nr_deltas, percent);
 526        }
 527        if (verbose && nr_resolved_deltas == nr_deltas)
 528                fputc('\n', stderr);
 529}
 530
 531static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc)
 532{
 533        z_stream stream;
 534        unsigned long maxsize;
 535        void *out;
 536
 537        memset(&stream, 0, sizeof(stream));
 538        deflateInit(&stream, zlib_compression_level);
 539        maxsize = deflateBound(&stream, size);
 540        out = xmalloc(maxsize);
 541
 542        /* Compress it */
 543        stream.next_in = in;
 544        stream.avail_in = size;
 545        stream.next_out = out;
 546        stream.avail_out = maxsize;
 547        while (deflate(&stream, Z_FINISH) == Z_OK);
 548        deflateEnd(&stream);
 549
 550        size = stream.total_out;
 551        write_or_die(fd, out, size);
 552        *obj_crc = crc32(*obj_crc, out, size);
 553        free(out);
 554        return size;
 555}
 556
 557static void append_obj_to_pack(const unsigned char *sha1, void *buf,
 558                               unsigned long size, enum object_type type)
 559{
 560        struct object_entry *obj = &objects[nr_objects++];
 561        unsigned char header[10];
 562        unsigned long s = size;
 563        int n = 0;
 564        unsigned char c = (type << 4) | (s & 15);
 565        s >>= 4;
 566        while (s) {
 567                header[n++] = c | 0x80;
 568                c = s & 0x7f;
 569                s >>= 7;
 570        }
 571        header[n++] = c;
 572        write_or_die(output_fd, header, n);
 573        obj[0].crc32 = crc32(0, Z_NULL, 0);
 574        obj[0].crc32 = crc32(obj[0].crc32, header, n);
 575        obj[1].offset = obj[0].offset + n;
 576        obj[1].offset += write_compressed(output_fd, buf, size, &obj[0].crc32);
 577        hashcpy(obj->sha1, sha1);
 578}
 579
 580static int delta_pos_compare(const void *_a, const void *_b)
 581{
 582        struct delta_entry *a = *(struct delta_entry **)_a;
 583        struct delta_entry *b = *(struct delta_entry **)_b;
 584        return a->obj_no - b->obj_no;
 585}
 586
 587static void fix_unresolved_deltas(int nr_unresolved)
 588{
 589        struct delta_entry **sorted_by_pos;
 590        int i, n = 0, percent = -1;
 591
 592        /*
 593         * Since many unresolved deltas may well be themselves base objects
 594         * for more unresolved deltas, we really want to include the
 595         * smallest number of base objects that would cover as much delta
 596         * as possible by picking the
 597         * trunc deltas first, allowing for other deltas to resolve without
 598         * additional base objects.  Since most base objects are to be found
 599         * before deltas depending on them, a good heuristic is to start
 600         * resolving deltas in the same order as their position in the pack.
 601         */
 602        sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
 603        for (i = 0; i < nr_deltas; i++) {
 604                if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
 605                        continue;
 606                sorted_by_pos[n++] = &deltas[i];
 607        }
 608        qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);
 609
 610        for (i = 0; i < n; i++) {
 611                struct delta_entry *d = sorted_by_pos[i];
 612                void *data;
 613                unsigned long size;
 614                enum object_type type;
 615                int j, first, last;
 616
 617                if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 618                        continue;
 619                data = read_sha1_file(d->base.sha1, &type, &size);
 620                if (!data)
 621                        continue;
 622
 623                find_delta_children(&d->base, &first, &last);
 624                for (j = first; j <= last; j++) {
 625                        struct object_entry *child = objects + deltas[j].obj_no;
 626                        if (child->real_type == OBJ_REF_DELTA)
 627                                resolve_delta(child, data, size, type);
 628                }
 629
 630                if (check_sha1_signature(d->base.sha1, data, size, typename(type)))
 631                        die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
 632                append_obj_to_pack(d->base.sha1, data, size, type);
 633                free(data);
 634                if (verbose)
 635                        percent = display_progress(nr_resolved_deltas,
 636                                                   nr_deltas, percent);
 637        }
 638        free(sorted_by_pos);
 639        if (verbose)
 640                fputc('\n', stderr);
 641}
 642
 643static void readjust_pack_header_and_sha1(unsigned char *sha1)
 644{
 645        struct pack_header hdr;
 646        SHA_CTX ctx;
 647        int size;
 648
 649        /* Rewrite pack header with updated object number */
 650        if (lseek(output_fd, 0, SEEK_SET) != 0)
 651                die("cannot seek back: %s", strerror(errno));
 652        if (read_in_full(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
 653                die("cannot read pack header back: %s", strerror(errno));
 654        hdr.hdr_entries = htonl(nr_objects);
 655        if (lseek(output_fd, 0, SEEK_SET) != 0)
 656                die("cannot seek back: %s", strerror(errno));
 657        write_or_die(output_fd, &hdr, sizeof(hdr));
 658        if (lseek(output_fd, 0, SEEK_SET) != 0)
 659                die("cannot seek back: %s", strerror(errno));
 660
 661        /* Recompute and store the new pack's SHA1 */
 662        SHA1_Init(&ctx);
 663        do {
 664                unsigned char *buf[4096];
 665                size = xread(output_fd, buf, sizeof(buf));
 666                if (size < 0)
 667                        die("cannot read pack data back: %s", strerror(errno));
 668                SHA1_Update(&ctx, buf, size);
 669        } while (size > 0);
 670        SHA1_Final(sha1, &ctx);
 671        write_or_die(output_fd, sha1, 20);
 672}
 673
 674static uint32_t index_default_version = 1;
 675static uint32_t index_off32_limit = 0x7fffffff;
 676
 677static int sha1_compare(const void *_a, const void *_b)
 678{
 679        struct object_entry *a = *(struct object_entry **)_a;
 680        struct object_entry *b = *(struct object_entry **)_b;
 681        return hashcmp(a->sha1, b->sha1);
 682}
 683
 684/*
 685 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 686 * the SHA1 hash of sorted object names.
 687 */
 688static const char *write_index_file(const char *index_name, unsigned char *sha1)
 689{
 690        struct sha1file *f;
 691        struct object_entry **sorted_by_sha, **list, **last;
 692        uint32_t array[256];
 693        int i, fd;
 694        SHA_CTX ctx;
 695        uint32_t index_version;
 696
 697        if (nr_objects) {
 698                sorted_by_sha =
 699                        xcalloc(nr_objects, sizeof(struct object_entry *));
 700                list = sorted_by_sha;
 701                last = sorted_by_sha + nr_objects;
 702                for (i = 0; i < nr_objects; ++i)
 703                        sorted_by_sha[i] = &objects[i];
 704                qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
 705                      sha1_compare);
 706        }
 707        else
 708                sorted_by_sha = list = last = NULL;
 709
 710        if (!index_name) {
 711                static char tmpfile[PATH_MAX];
 712                snprintf(tmpfile, sizeof(tmpfile),
 713                         "%s/tmp_idx_XXXXXX", get_object_directory());
 714                fd = mkstemp(tmpfile);
 715                index_name = xstrdup(tmpfile);
 716        } else {
 717                unlink(index_name);
 718                fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
 719        }
 720        if (fd < 0)
 721                die("unable to create %s: %s", index_name, strerror(errno));
 722        f = sha1fd(fd, index_name);
 723
 724        /* if last object's offset is >= 2^31 we should use index V2 */
 725        index_version = (objects[nr_objects-1].offset >> 31) ? 2 : index_default_version;
 726
 727        /* index versions 2 and above need a header */
 728        if (index_version >= 2) {
 729                struct pack_idx_header hdr;
 730                hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
 731                hdr.idx_version = htonl(index_version);
 732                sha1write(f, &hdr, sizeof(hdr));
 733        }
 734
 735        /*
 736         * Write the first-level table (the list is sorted,
 737         * but we use a 256-entry lookup to be able to avoid
 738         * having to do eight extra binary search iterations).
 739         */
 740        for (i = 0; i < 256; i++) {
 741                struct object_entry **next = list;
 742                while (next < last) {
 743                        struct object_entry *obj = *next;
 744                        if (obj->sha1[0] != i)
 745                                break;
 746                        next++;
 747                }
 748                array[i] = htonl(next - sorted_by_sha);
 749                list = next;
 750        }
 751        sha1write(f, array, 256 * 4);
 752
 753        /* compute the SHA1 hash of sorted object names. */
 754        SHA1_Init(&ctx);
 755
 756        /*
 757         * Write the actual SHA1 entries..
 758         */
 759        list = sorted_by_sha;
 760        for (i = 0; i < nr_objects; i++) {
 761                struct object_entry *obj = *list++;
 762                if (index_version < 2) {
 763                        uint32_t offset = htonl(obj->offset);
 764                        sha1write(f, &offset, 4);
 765                }
 766                sha1write(f, obj->sha1, 20);
 767                SHA1_Update(&ctx, obj->sha1, 20);
 768        }
 769
 770        if (index_version >= 2) {
 771                unsigned int nr_large_offset = 0;
 772
 773                /* write the crc32 table */
 774                list = sorted_by_sha;
 775                for (i = 0; i < nr_objects; i++) {
 776                        struct object_entry *obj = *list++;
 777                        uint32_t crc32_val = htonl(obj->crc32);
 778                        sha1write(f, &crc32_val, 4);
 779                }
 780
 781                /* write the 32-bit offset table */
 782                list = sorted_by_sha;
 783                for (i = 0; i < nr_objects; i++) {
 784                        struct object_entry *obj = *list++;
 785                        uint32_t offset = (obj->offset <= index_off32_limit) ?
 786                                obj->offset : (0x80000000 | nr_large_offset++);
 787                        offset = htonl(offset);
 788                        sha1write(f, &offset, 4);
 789                }
 790
 791                /* write the large offset table */
 792                list = sorted_by_sha;
 793                while (nr_large_offset) {
 794                        struct object_entry *obj = *list++;
 795                        uint64_t offset = obj->offset;
 796                        if (offset > index_off32_limit) {
 797                                uint32_t split[2];
 798                                split[0]        = htonl(offset >> 32);
 799                                split[1] = htonl(offset & 0xffffffff);
 800                                sha1write(f, split, 8);
 801                                nr_large_offset--;
 802                        }
 803                }
 804        }
 805
 806        sha1write(f, sha1, 20);
 807        sha1close(f, NULL, 1);
 808        free(sorted_by_sha);
 809        SHA1_Final(sha1, &ctx);
 810        return index_name;
 811}
 812
 813static void final(const char *final_pack_name, const char *curr_pack_name,
 814                  const char *final_index_name, const char *curr_index_name,
 815                  const char *keep_name, const char *keep_msg,
 816                  unsigned char *sha1)
 817{
 818        const char *report = "pack";
 819        char name[PATH_MAX];
 820        int err;
 821
 822        if (!from_stdin) {
 823                close(input_fd);
 824        } else {
 825                err = close(output_fd);
 826                if (err)
 827                        die("error while closing pack file: %s", strerror(errno));
 828                chmod(curr_pack_name, 0444);
 829        }
 830
 831        if (keep_msg) {
 832                int keep_fd, keep_msg_len = strlen(keep_msg);
 833                if (!keep_name) {
 834                        snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
 835                                 get_object_directory(), sha1_to_hex(sha1));
 836                        keep_name = name;
 837                }
 838                keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
 839                if (keep_fd < 0) {
 840                        if (errno != EEXIST)
 841                                die("cannot write keep file");
 842                } else {
 843                        if (keep_msg_len > 0) {
 844                                write_or_die(keep_fd, keep_msg, keep_msg_len);
 845                                write_or_die(keep_fd, "\n", 1);
 846                        }
 847                        close(keep_fd);
 848                        report = "keep";
 849                }
 850        }
 851
 852        if (final_pack_name != curr_pack_name) {
 853                if (!final_pack_name) {
 854                        snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
 855                                 get_object_directory(), sha1_to_hex(sha1));
 856                        final_pack_name = name;
 857                }
 858                if (move_temp_to_file(curr_pack_name, final_pack_name))
 859                        die("cannot store pack file");
 860        }
 861
 862        chmod(curr_index_name, 0444);
 863        if (final_index_name != curr_index_name) {
 864                if (!final_index_name) {
 865                        snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
 866                                 get_object_directory(), sha1_to_hex(sha1));
 867                        final_index_name = name;
 868                }
 869                if (move_temp_to_file(curr_index_name, final_index_name))
 870                        die("cannot store index file");
 871        }
 872
 873        if (!from_stdin) {
 874                printf("%s\n", sha1_to_hex(sha1));
 875        } else {
 876                char buf[48];
 877                int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
 878                                   report, sha1_to_hex(sha1));
 879                write_or_die(1, buf, len);
 880
 881                /*
 882                 * Let's just mimic git-unpack-objects here and write
 883                 * the last part of the input buffer to stdout.
 884                 */
 885                while (input_len) {
 886                        err = xwrite(1, input_buffer + input_offset, input_len);
 887                        if (err <= 0)
 888                                break;
 889                        input_len -= err;
 890                        input_offset += err;
 891                }
 892        }
 893}
 894
 895int main(int argc, char **argv)
 896{
 897        int i, fix_thin_pack = 0;
 898        const char *curr_pack, *pack_name = NULL;
 899        const char *curr_index, *index_name = NULL;
 900        const char *keep_name = NULL, *keep_msg = NULL;
 901        char *index_name_buf = NULL, *keep_name_buf = NULL;
 902        unsigned char sha1[20];
 903
 904        for (i = 1; i < argc; i++) {
 905                const char *arg = argv[i];
 906
 907                if (*arg == '-') {
 908                        if (!strcmp(arg, "--stdin")) {
 909                                from_stdin = 1;
 910                        } else if (!strcmp(arg, "--fix-thin")) {
 911                                fix_thin_pack = 1;
 912                        } else if (!strcmp(arg, "--keep")) {
 913                                keep_msg = "";
 914                        } else if (!prefixcmp(arg, "--keep=")) {
 915                                keep_msg = arg + 7;
 916                        } else if (!prefixcmp(arg, "--pack_header=")) {
 917                                struct pack_header *hdr;
 918                                char *c;
 919
 920                                hdr = (struct pack_header *)input_buffer;
 921                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 922                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 923                                if (*c != ',')
 924                                        die("bad %s", arg);
 925                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 926                                if (*c)
 927                                        die("bad %s", arg);
 928                                input_len = sizeof(*hdr);
 929                        } else if (!strcmp(arg, "-v")) {
 930                                verbose = 1;
 931                        } else if (!strcmp(arg, "-o")) {
 932                                if (index_name || (i+1) >= argc)
 933                                        usage(index_pack_usage);
 934                                index_name = argv[++i];
 935                        } else if (!prefixcmp(arg, "--index-version=")) {
 936                                char *c;
 937                                index_default_version = strtoul(arg + 16, &c, 10);
 938                                if (index_default_version > 2)
 939                                        die("bad %s", arg);
 940                                if (*c == ',')
 941                                        index_off32_limit = strtoul(c+1, &c, 0);
 942                                if (*c || index_off32_limit & 0x80000000)
 943                                        die("bad %s", arg);
 944                        } else
 945                                usage(index_pack_usage);
 946                        continue;
 947                }
 948
 949                if (pack_name)
 950                        usage(index_pack_usage);
 951                pack_name = arg;
 952        }
 953
 954        if (!pack_name && !from_stdin)
 955                usage(index_pack_usage);
 956        if (fix_thin_pack && !from_stdin)
 957                die("--fix-thin cannot be used without --stdin");
 958        if (!index_name && pack_name) {
 959                int len = strlen(pack_name);
 960                if (!has_extension(pack_name, ".pack"))
 961                        die("packfile name '%s' does not end with '.pack'",
 962                            pack_name);
 963                index_name_buf = xmalloc(len);
 964                memcpy(index_name_buf, pack_name, len - 5);
 965                strcpy(index_name_buf + len - 5, ".idx");
 966                index_name = index_name_buf;
 967        }
 968        if (keep_msg && !keep_name && pack_name) {
 969                int len = strlen(pack_name);
 970                if (!has_extension(pack_name, ".pack"))
 971                        die("packfile name '%s' does not end with '.pack'",
 972                            pack_name);
 973                keep_name_buf = xmalloc(len);
 974                memcpy(keep_name_buf, pack_name, len - 5);
 975                strcpy(keep_name_buf + len - 5, ".keep");
 976                keep_name = keep_name_buf;
 977        }
 978
 979        curr_pack = open_pack_file(pack_name);
 980        parse_pack_header();
 981        objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
 982        deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
 983        if (verbose)
 984                setup_progress_signal();
 985        parse_pack_objects(sha1);
 986        if (nr_deltas != nr_resolved_deltas) {
 987                if (fix_thin_pack) {
 988                        int nr_unresolved = nr_deltas - nr_resolved_deltas;
 989                        int nr_objects_initial = nr_objects;
 990                        if (nr_unresolved <= 0)
 991                                die("confusion beyond insanity");
 992                        objects = xrealloc(objects,
 993                                           (nr_objects + nr_unresolved + 1)
 994                                           * sizeof(*objects));
 995                        fix_unresolved_deltas(nr_unresolved);
 996                        if (verbose)
 997                                fprintf(stderr, "%d objects were added to complete this thin pack.\n",
 998                                        nr_objects - nr_objects_initial);
 999                        readjust_pack_header_and_sha1(sha1);
1000                }
1001                if (nr_deltas != nr_resolved_deltas)
1002                        die("pack has %d unresolved deltas",
1003                            nr_deltas - nr_resolved_deltas);
1004        } else {
1005                /* Flush remaining pack final 20-byte SHA1. */
1006                flush();
1007        }
1008        free(deltas);
1009        curr_index = write_index_file(index_name, sha1);
1010        final(pack_name, curr_pack,
1011                index_name, curr_index,
1012                keep_name, keep_msg,
1013                sha1);
1014        free(objects);
1015        free(index_name_buf);
1016        free(keep_name_buf);
1017
1018        return 0;
1019}