24a9a162202aec2c1481ab75b565cd4a73b60be6
   1#include "cache.h"
   2#include "delta.h"
   3#include "pack.h"
   4#include "csum-file.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9#include "progress.h"
  10#include "fsck.h"
  11#include "exec_cmd.h"
  12
  13static const char index_pack_usage[] =
  14"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
  15
  16struct object_entry
  17{
  18        struct pack_idx_entry idx;
  19        unsigned long size;
  20        unsigned int hdr_size;
  21        enum object_type type;
  22        enum object_type real_type;
  23};
  24
  25union delta_base {
  26        unsigned char sha1[20];
  27        off_t offset;
  28};
  29
  30struct base_data {
  31        struct base_data *base;
  32        struct base_data *child;
  33        struct object_entry *obj;
  34        void *data;
  35        unsigned long size;
  36};
  37
  38/*
  39 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
  40 * to memcmp() only the first 20 bytes.
  41 */
  42#define UNION_BASE_SZ   20
  43
  44#define FLAG_LINK (1u<<20)
  45#define FLAG_CHECKED (1u<<21)
  46
  47struct delta_entry
  48{
  49        union delta_base base;
  50        int obj_no;
  51};
  52
  53static struct object_entry *objects;
  54static struct delta_entry *deltas;
  55static struct base_data *base_cache;
  56static size_t base_cache_used;
  57static int nr_objects;
  58static int nr_deltas;
  59static int nr_resolved_deltas;
  60
  61static int from_stdin;
  62static int strict;
  63static int verbose;
  64
  65static struct progress *progress;
  66
  67/* We always read in 4kB chunks. */
  68static unsigned char input_buffer[4096];
  69static unsigned int input_offset, input_len;
  70static off_t consumed_bytes;
  71static git_SHA_CTX input_ctx;
  72static uint32_t input_crc32;
  73static int input_fd, output_fd, pack_fd;
  74
  75static int mark_link(struct object *obj, int type, void *data)
  76{
  77        if (!obj)
  78                return -1;
  79
  80        if (type != OBJ_ANY && obj->type != type)
  81                die("object type mismatch at %s", sha1_to_hex(obj->sha1));
  82
  83        obj->flags |= FLAG_LINK;
  84        return 0;
  85}
  86
  87/* The content of each linked object must have been checked
  88   or it must be already present in the object database */
  89static void check_object(struct object *obj)
  90{
  91        if (!obj)
  92                return;
  93
  94        if (!(obj->flags & FLAG_LINK))
  95                return;
  96
  97        if (!(obj->flags & FLAG_CHECKED)) {
  98                unsigned long size;
  99                int type = sha1_object_info(obj->sha1, &size);
 100                if (type != obj->type || type <= 0)
 101                        die("object of unexpected type");
 102                obj->flags |= FLAG_CHECKED;
 103                return;
 104        }
 105}
 106
 107static void check_objects(void)
 108{
 109        unsigned i, max;
 110
 111        max = get_max_object_index();
 112        for (i = 0; i < max; i++)
 113                check_object(get_indexed_object(i));
 114}
 115
 116
 117/* Discard current buffer used content. */
 118static void flush(void)
 119{
 120        if (input_offset) {
 121                if (output_fd >= 0)
 122                        write_or_die(output_fd, input_buffer, input_offset);
 123                git_SHA1_Update(&input_ctx, input_buffer, input_offset);
 124                memmove(input_buffer, input_buffer + input_offset, input_len);
 125                input_offset = 0;
 126        }
 127}
 128
 129/*
 130 * Make sure at least "min" bytes are available in the buffer, and
 131 * return the pointer to the buffer.
 132 */
 133static void *fill(int min)
 134{
 135        if (min <= input_len)
 136                return input_buffer + input_offset;
 137        if (min > sizeof(input_buffer))
 138                die("cannot fill %d bytes", min);
 139        flush();
 140        do {
 141                ssize_t ret = xread(input_fd, input_buffer + input_len,
 142                                sizeof(input_buffer) - input_len);
 143                if (ret <= 0) {
 144                        if (!ret)
 145                                die("early EOF");
 146                        die_errno("read error on input");
 147                }
 148                input_len += ret;
 149                if (from_stdin)
 150                        display_throughput(progress, consumed_bytes + input_len);
 151        } while (input_len < min);
 152        return input_buffer;
 153}
 154
 155static void use(int bytes)
 156{
 157        if (bytes > input_len)
 158                die("used more bytes than were available");
 159        input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
 160        input_len -= bytes;
 161        input_offset += bytes;
 162
 163        /* make sure off_t is sufficiently large not to wrap */
 164        if (signed_add_overflows(consumed_bytes, bytes))
 165                die("pack too large for current definition of off_t");
 166        consumed_bytes += bytes;
 167}
 168
 169static const char *open_pack_file(const char *pack_name)
 170{
 171        if (from_stdin) {
 172                input_fd = 0;
 173                if (!pack_name) {
 174                        static char tmpfile[PATH_MAX];
 175                        output_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
 176                                                "pack/tmp_pack_XXXXXX");
 177                        pack_name = xstrdup(tmpfile);
 178                } else
 179                        output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
 180                if (output_fd < 0)
 181                        die_errno("unable to create '%s'", pack_name);
 182                pack_fd = output_fd;
 183        } else {
 184                input_fd = open(pack_name, O_RDONLY);
 185                if (input_fd < 0)
 186                        die_errno("cannot open packfile '%s'", pack_name);
 187                output_fd = -1;
 188                pack_fd = input_fd;
 189        }
 190        git_SHA1_Init(&input_ctx);
 191        return pack_name;
 192}
 193
 194static void parse_pack_header(void)
 195{
 196        struct pack_header *hdr = fill(sizeof(struct pack_header));
 197
 198        /* Header consistency check */
 199        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
 200                die("pack signature mismatch");
 201        if (!pack_version_ok(hdr->hdr_version))
 202                die("pack version %"PRIu32" unsupported",
 203                        ntohl(hdr->hdr_version));
 204
 205        nr_objects = ntohl(hdr->hdr_entries);
 206        use(sizeof(struct pack_header));
 207}
 208
 209static NORETURN void bad_object(unsigned long offset, const char *format,
 210                       ...) __attribute__((format (printf, 2, 3)));
 211
 212static void bad_object(unsigned long offset, const char *format, ...)
 213{
 214        va_list params;
 215        char buf[1024];
 216
 217        va_start(params, format);
 218        vsnprintf(buf, sizeof(buf), format, params);
 219        va_end(params);
 220        die("pack has bad object at offset %lu: %s", offset, buf);
 221}
 222
 223static void free_base_data(struct base_data *c)
 224{
 225        if (c->data) {
 226                free(c->data);
 227                c->data = NULL;
 228                base_cache_used -= c->size;
 229        }
 230}
 231
 232static void prune_base_data(struct base_data *retain)
 233{
 234        struct base_data *b;
 235        for (b = base_cache;
 236             base_cache_used > delta_base_cache_limit && b;
 237             b = b->child) {
 238                if (b->data && b != retain)
 239                        free_base_data(b);
 240        }
 241}
 242
 243static void link_base_data(struct base_data *base, struct base_data *c)
 244{
 245        if (base)
 246                base->child = c;
 247        else
 248                base_cache = c;
 249
 250        c->base = base;
 251        c->child = NULL;
 252        if (c->data)
 253                base_cache_used += c->size;
 254        prune_base_data(c);
 255}
 256
 257static void unlink_base_data(struct base_data *c)
 258{
 259        struct base_data *base = c->base;
 260        if (base)
 261                base->child = NULL;
 262        else
 263                base_cache = NULL;
 264        free_base_data(c);
 265}
 266
 267static void *unpack_entry_data(unsigned long offset, unsigned long size)
 268{
 269        int status;
 270        z_stream stream;
 271        void *buf = xmalloc(size);
 272
 273        memset(&stream, 0, sizeof(stream));
 274        git_inflate_init(&stream);
 275        stream.next_out = buf;
 276        stream.avail_out = size;
 277
 278        do {
 279                stream.next_in = fill(1);
 280                stream.avail_in = input_len;
 281                status = git_inflate(&stream, 0);
 282                use(input_len - stream.avail_in);
 283        } while (status == Z_OK);
 284        if (stream.total_out != size || status != Z_STREAM_END)
 285                bad_object(offset, "inflate returned %d", status);
 286        git_inflate_end(&stream);
 287        return buf;
 288}
 289
 290static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 291{
 292        unsigned char *p;
 293        unsigned long size, c;
 294        off_t base_offset;
 295        unsigned shift;
 296        void *data;
 297
 298        obj->idx.offset = consumed_bytes;
 299        input_crc32 = crc32(0, Z_NULL, 0);
 300
 301        p = fill(1);
 302        c = *p;
 303        use(1);
 304        obj->type = (c >> 4) & 7;
 305        size = (c & 15);
 306        shift = 4;
 307        while (c & 0x80) {
 308                p = fill(1);
 309                c = *p;
 310                use(1);
 311                size += (c & 0x7f) << shift;
 312                shift += 7;
 313        }
 314        obj->size = size;
 315
 316        switch (obj->type) {
 317        case OBJ_REF_DELTA:
 318                hashcpy(delta_base->sha1, fill(20));
 319                use(20);
 320                break;
 321        case OBJ_OFS_DELTA:
 322                memset(delta_base, 0, sizeof(*delta_base));
 323                p = fill(1);
 324                c = *p;
 325                use(1);
 326                base_offset = c & 127;
 327                while (c & 128) {
 328                        base_offset += 1;
 329                        if (!base_offset || MSB(base_offset, 7))
 330                                bad_object(obj->idx.offset, "offset value overflow for delta base object");
 331                        p = fill(1);
 332                        c = *p;
 333                        use(1);
 334                        base_offset = (base_offset << 7) + (c & 127);
 335                }
 336                delta_base->offset = obj->idx.offset - base_offset;
 337                if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset)
 338                        bad_object(obj->idx.offset, "delta base offset is out of bound");
 339                break;
 340        case OBJ_COMMIT:
 341        case OBJ_TREE:
 342        case OBJ_BLOB:
 343        case OBJ_TAG:
 344                break;
 345        default:
 346                bad_object(obj->idx.offset, "unknown object type %d", obj->type);
 347        }
 348        obj->hdr_size = consumed_bytes - obj->idx.offset;
 349
 350        data = unpack_entry_data(obj->idx.offset, obj->size);
 351        obj->idx.crc32 = input_crc32;
 352        return data;
 353}
 354
 355static void *get_data_from_pack(struct object_entry *obj)
 356{
 357        off_t from = obj[0].idx.offset + obj[0].hdr_size;
 358        unsigned long len = obj[1].idx.offset - from;
 359        unsigned char *data, *inbuf;
 360        z_stream stream;
 361        int status;
 362
 363        data = xmalloc(obj->size);
 364        inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
 365
 366        memset(&stream, 0, sizeof(stream));
 367        git_inflate_init(&stream);
 368        stream.next_out = data;
 369        stream.avail_out = obj->size;
 370
 371        do {
 372                ssize_t n = (len < 64*1024) ? len : 64*1024;
 373                n = pread(pack_fd, inbuf, n, from);
 374                if (n < 0)
 375                        die_errno("cannot pread pack file");
 376                if (!n)
 377                        die("premature end of pack file, %lu bytes missing", len);
 378                from += n;
 379                len -= n;
 380                stream.next_in = inbuf;
 381                stream.avail_in = n;
 382                status = git_inflate(&stream, 0);
 383        } while (len && status == Z_OK && !stream.avail_in);
 384
 385        /* This has been inflated OK when first encountered, so... */
 386        if (status != Z_STREAM_END || stream.total_out != obj->size)
 387                die("serious inflate inconsistency");
 388
 389        git_inflate_end(&stream);
 390        free(inbuf);
 391        return data;
 392}
 393
 394static int compare_delta_bases(const union delta_base *base1,
 395                               const union delta_base *base2,
 396                               enum object_type type1,
 397                               enum object_type type2)
 398{
 399        int cmp = type1 - type2;
 400        if (cmp)
 401                return cmp;
 402        return memcmp(base1, base2, UNION_BASE_SZ);
 403}
 404
 405static int find_delta(const union delta_base *base, enum object_type type)
 406{
 407        int first = 0, last = nr_deltas;
 408
 409        while (first < last) {
 410                int next = (first + last) / 2;
 411                struct delta_entry *delta = &deltas[next];
 412                int cmp;
 413
 414                cmp = compare_delta_bases(base, &delta->base,
 415                                          type, objects[delta->obj_no].type);
 416                if (!cmp)
 417                        return next;
 418                if (cmp < 0) {
 419                        last = next;
 420                        continue;
 421                }
 422                first = next+1;
 423        }
 424        return -first-1;
 425}
 426
 427static void find_delta_children(const union delta_base *base,
 428                                int *first_index, int *last_index,
 429                                enum object_type type)
 430{
 431        int first = find_delta(base, type);
 432        int last = first;
 433        int end = nr_deltas - 1;
 434
 435        if (first < 0) {
 436                *first_index = 0;
 437                *last_index = -1;
 438                return;
 439        }
 440        while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 441                --first;
 442        while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 443                ++last;
 444        *first_index = first;
 445        *last_index = last;
 446}
 447
 448static void sha1_object(const void *data, unsigned long size,
 449                        enum object_type type, unsigned char *sha1)
 450{
 451        hash_sha1_file(data, size, typename(type), sha1);
 452        if (has_sha1_file(sha1)) {
 453                void *has_data;
 454                enum object_type has_type;
 455                unsigned long has_size;
 456                has_data = read_sha1_file(sha1, &has_type, &has_size);
 457                if (!has_data)
 458                        die("cannot read existing object %s", sha1_to_hex(sha1));
 459                if (size != has_size || type != has_type ||
 460                    memcmp(data, has_data, size) != 0)
 461                        die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
 462                free(has_data);
 463        }
 464        if (strict) {
 465                if (type == OBJ_BLOB) {
 466                        struct blob *blob = lookup_blob(sha1);
 467                        if (blob)
 468                                blob->object.flags |= FLAG_CHECKED;
 469                        else
 470                                die("invalid blob object %s", sha1_to_hex(sha1));
 471                } else {
 472                        struct object *obj;
 473                        int eaten;
 474                        void *buf = (void *) data;
 475
 476                        /*
 477                         * we do not need to free the memory here, as the
 478                         * buf is deleted by the caller.
 479                         */
 480                        obj = parse_object_buffer(sha1, type, size, buf, &eaten);
 481                        if (!obj)
 482                                die("invalid %s", typename(type));
 483                        if (fsck_object(obj, 1, fsck_error_function))
 484                                die("Error in object");
 485                        if (fsck_walk(obj, mark_link, NULL))
 486                                die("Not all child objects of %s are reachable", sha1_to_hex(obj->sha1));
 487
 488                        if (obj->type == OBJ_TREE) {
 489                                struct tree *item = (struct tree *) obj;
 490                                item->buffer = NULL;
 491                        }
 492                        if (obj->type == OBJ_COMMIT) {
 493                                struct commit *commit = (struct commit *) obj;
 494                                commit->buffer = NULL;
 495                        }
 496                        obj->flags |= FLAG_CHECKED;
 497                }
 498        }
 499}
 500
 501static void *get_base_data(struct base_data *c)
 502{
 503        if (!c->data) {
 504                struct object_entry *obj = c->obj;
 505
 506                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 507                        void *base = get_base_data(c->base);
 508                        void *raw = get_data_from_pack(obj);
 509                        c->data = patch_delta(
 510                                base, c->base->size,
 511                                raw, obj->size,
 512                                &c->size);
 513                        free(raw);
 514                        if (!c->data)
 515                                bad_object(obj->idx.offset, "failed to apply delta");
 516                } else {
 517                        c->data = get_data_from_pack(obj);
 518                        c->size = obj->size;
 519                }
 520
 521                base_cache_used += c->size;
 522                prune_base_data(c);
 523        }
 524        return c->data;
 525}
 526
 527static void resolve_delta(struct object_entry *delta_obj,
 528                          struct base_data *base, struct base_data *result)
 529{
 530        void *base_data, *delta_data;
 531
 532        delta_obj->real_type = base->obj->real_type;
 533        delta_data = get_data_from_pack(delta_obj);
 534        base_data = get_base_data(base);
 535        result->obj = delta_obj;
 536        result->data = patch_delta(base_data, base->size,
 537                                   delta_data, delta_obj->size, &result->size);
 538        free(delta_data);
 539        if (!result->data)
 540                bad_object(delta_obj->idx.offset, "failed to apply delta");
 541        sha1_object(result->data, result->size, delta_obj->real_type,
 542                    delta_obj->idx.sha1);
 543        nr_resolved_deltas++;
 544}
 545
 546static void find_unresolved_deltas(struct base_data *base,
 547                                   struct base_data *prev_base)
 548{
 549        int i, ref_first, ref_last, ofs_first, ofs_last;
 550
 551        /*
 552         * This is a recursive function. Those brackets should help reducing
 553         * stack usage by limiting the scope of the delta_base union.
 554         */
 555        {
 556                union delta_base base_spec;
 557
 558                hashcpy(base_spec.sha1, base->obj->idx.sha1);
 559                find_delta_children(&base_spec,
 560                                    &ref_first, &ref_last, OBJ_REF_DELTA);
 561
 562                memset(&base_spec, 0, sizeof(base_spec));
 563                base_spec.offset = base->obj->idx.offset;
 564                find_delta_children(&base_spec,
 565                                    &ofs_first, &ofs_last, OBJ_OFS_DELTA);
 566        }
 567
 568        if (ref_last == -1 && ofs_last == -1) {
 569                free(base->data);
 570                return;
 571        }
 572
 573        link_base_data(prev_base, base);
 574
 575        for (i = ref_first; i <= ref_last; i++) {
 576                struct object_entry *child = objects + deltas[i].obj_no;
 577                struct base_data result;
 578
 579                assert(child->real_type == OBJ_REF_DELTA);
 580                resolve_delta(child, base, &result);
 581                if (i == ref_last && ofs_last == -1)
 582                        free_base_data(base);
 583                find_unresolved_deltas(&result, base);
 584        }
 585
 586        for (i = ofs_first; i <= ofs_last; i++) {
 587                struct object_entry *child = objects + deltas[i].obj_no;
 588                struct base_data result;
 589
 590                assert(child->real_type == OBJ_OFS_DELTA);
 591                resolve_delta(child, base, &result);
 592                if (i == ofs_last)
 593                        free_base_data(base);
 594                find_unresolved_deltas(&result, base);
 595        }
 596
 597        unlink_base_data(base);
 598}
 599
 600static int compare_delta_entry(const void *a, const void *b)
 601{
 602        const struct delta_entry *delta_a = a;
 603        const struct delta_entry *delta_b = b;
 604
 605        /* group by type (ref vs ofs) and then by value (sha-1 or offset) */
 606        return compare_delta_bases(&delta_a->base, &delta_b->base,
 607                                   objects[delta_a->obj_no].type,
 608                                   objects[delta_b->obj_no].type);
 609}
 610
 611/* Parse all objects and return the pack content SHA1 hash */
 612static void parse_pack_objects(unsigned char *sha1)
 613{
 614        int i;
 615        struct delta_entry *delta = deltas;
 616        struct stat st;
 617
 618        /*
 619         * First pass:
 620         * - find locations of all objects;
 621         * - calculate SHA1 of all non-delta objects;
 622         * - remember base (SHA1 or offset) for all deltas.
 623         */
 624        if (verbose)
 625                progress = start_progress(
 626                                from_stdin ? "Receiving objects" : "Indexing objects",
 627                                nr_objects);
 628        for (i = 0; i < nr_objects; i++) {
 629                struct object_entry *obj = &objects[i];
 630                void *data = unpack_raw_entry(obj, &delta->base);
 631                obj->real_type = obj->type;
 632                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 633                        nr_deltas++;
 634                        delta->obj_no = i;
 635                        delta++;
 636                } else
 637                        sha1_object(data, obj->size, obj->type, obj->idx.sha1);
 638                free(data);
 639                display_progress(progress, i+1);
 640        }
 641        objects[i].idx.offset = consumed_bytes;
 642        stop_progress(&progress);
 643
 644        /* Check pack integrity */
 645        flush();
 646        git_SHA1_Final(sha1, &input_ctx);
 647        if (hashcmp(fill(20), sha1))
 648                die("pack is corrupted (SHA1 mismatch)");
 649        use(20);
 650
 651        /* If input_fd is a file, we should have reached its end now. */
 652        if (fstat(input_fd, &st))
 653                die_errno("cannot fstat packfile");
 654        if (S_ISREG(st.st_mode) &&
 655                        lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
 656                die("pack has junk at the end");
 657
 658        if (!nr_deltas)
 659                return;
 660
 661        /* Sort deltas by base SHA1/offset for fast searching */
 662        qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 663              compare_delta_entry);
 664
 665        /*
 666         * Second pass:
 667         * - for all non-delta objects, look if it is used as a base for
 668         *   deltas;
 669         * - if used as a base, uncompress the object and apply all deltas,
 670         *   recursively checking if the resulting object is used as a base
 671         *   for some more deltas.
 672         */
 673        if (verbose)
 674                progress = start_progress("Resolving deltas", nr_deltas);
 675        for (i = 0; i < nr_objects; i++) {
 676                struct object_entry *obj = &objects[i];
 677                struct base_data base_obj;
 678
 679                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 680                        continue;
 681                base_obj.obj = obj;
 682                base_obj.data = NULL;
 683                find_unresolved_deltas(&base_obj, NULL);
 684                display_progress(progress, nr_resolved_deltas);
 685        }
 686}
 687
 688static int write_compressed(struct sha1file *f, void *in, unsigned int size)
 689{
 690        z_stream stream;
 691        int status;
 692        unsigned char outbuf[4096];
 693
 694        memset(&stream, 0, sizeof(stream));
 695        deflateInit(&stream, zlib_compression_level);
 696        stream.next_in = in;
 697        stream.avail_in = size;
 698
 699        do {
 700                stream.next_out = outbuf;
 701                stream.avail_out = sizeof(outbuf);
 702                status = deflate(&stream, Z_FINISH);
 703                sha1write(f, outbuf, sizeof(outbuf) - stream.avail_out);
 704        } while (status == Z_OK);
 705
 706        if (status != Z_STREAM_END)
 707                die("unable to deflate appended object (%d)", status);
 708        size = stream.total_out;
 709        deflateEnd(&stream);
 710        return size;
 711}
 712
 713static struct object_entry *append_obj_to_pack(struct sha1file *f,
 714                               const unsigned char *sha1, void *buf,
 715                               unsigned long size, enum object_type type)
 716{
 717        struct object_entry *obj = &objects[nr_objects++];
 718        unsigned char header[10];
 719        unsigned long s = size;
 720        int n = 0;
 721        unsigned char c = (type << 4) | (s & 15);
 722        s >>= 4;
 723        while (s) {
 724                header[n++] = c | 0x80;
 725                c = s & 0x7f;
 726                s >>= 7;
 727        }
 728        header[n++] = c;
 729        crc32_begin(f);
 730        sha1write(f, header, n);
 731        obj[0].size = size;
 732        obj[0].hdr_size = n;
 733        obj[0].type = type;
 734        obj[0].real_type = type;
 735        obj[1].idx.offset = obj[0].idx.offset + n;
 736        obj[1].idx.offset += write_compressed(f, buf, size);
 737        obj[0].idx.crc32 = crc32_end(f);
 738        sha1flush(f);
 739        hashcpy(obj->idx.sha1, sha1);
 740        return obj;
 741}
 742
 743static int delta_pos_compare(const void *_a, const void *_b)
 744{
 745        struct delta_entry *a = *(struct delta_entry **)_a;
 746        struct delta_entry *b = *(struct delta_entry **)_b;
 747        return a->obj_no - b->obj_no;
 748}
 749
 750static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
 751{
 752        struct delta_entry **sorted_by_pos;
 753        int i, n = 0;
 754
 755        /*
 756         * Since many unresolved deltas may well be themselves base objects
 757         * for more unresolved deltas, we really want to include the
 758         * smallest number of base objects that would cover as much delta
 759         * as possible by picking the
 760         * trunc deltas first, allowing for other deltas to resolve without
 761         * additional base objects.  Since most base objects are to be found
 762         * before deltas depending on them, a good heuristic is to start
 763         * resolving deltas in the same order as their position in the pack.
 764         */
 765        sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
 766        for (i = 0; i < nr_deltas; i++) {
 767                if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
 768                        continue;
 769                sorted_by_pos[n++] = &deltas[i];
 770        }
 771        qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);
 772
 773        for (i = 0; i < n; i++) {
 774                struct delta_entry *d = sorted_by_pos[i];
 775                enum object_type type;
 776                struct base_data base_obj;
 777
 778                if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 779                        continue;
 780                base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size);
 781                if (!base_obj.data)
 782                        continue;
 783
 784                if (check_sha1_signature(d->base.sha1, base_obj.data,
 785                                base_obj.size, typename(type)))
 786                        die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
 787                base_obj.obj = append_obj_to_pack(f, d->base.sha1,
 788                                        base_obj.data, base_obj.size, type);
 789                find_unresolved_deltas(&base_obj, NULL);
 790                display_progress(progress, nr_resolved_deltas);
 791        }
 792        free(sorted_by_pos);
 793}
 794
 795static void final(const char *final_pack_name, const char *curr_pack_name,
 796                  const char *final_index_name, const char *curr_index_name,
 797                  const char *keep_name, const char *keep_msg,
 798                  unsigned char *sha1)
 799{
 800        const char *report = "pack";
 801        char name[PATH_MAX];
 802        int err;
 803
 804        if (!from_stdin) {
 805                close(input_fd);
 806        } else {
 807                fsync_or_die(output_fd, curr_pack_name);
 808                err = close(output_fd);
 809                if (err)
 810                        die_errno("error while closing pack file");
 811        }
 812
 813        if (keep_msg) {
 814                int keep_fd, keep_msg_len = strlen(keep_msg);
 815
 816                if (!keep_name)
 817                        keep_fd = odb_pack_keep(name, sizeof(name), sha1);
 818                else
 819                        keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
 820
 821                if (keep_fd < 0) {
 822                        if (errno != EEXIST)
 823                                die_errno("cannot write keep file '%s'",
 824                                          keep_name);
 825                } else {
 826                        if (keep_msg_len > 0) {
 827                                write_or_die(keep_fd, keep_msg, keep_msg_len);
 828                                write_or_die(keep_fd, "\n", 1);
 829                        }
 830                        if (close(keep_fd) != 0)
 831                                die_errno("cannot close written keep file '%s'",
 832                                    keep_name);
 833                        report = "keep";
 834                }
 835        }
 836
 837        if (final_pack_name != curr_pack_name) {
 838                if (!final_pack_name) {
 839                        snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
 840                                 get_object_directory(), sha1_to_hex(sha1));
 841                        final_pack_name = name;
 842                }
 843                if (move_temp_to_file(curr_pack_name, final_pack_name))
 844                        die("cannot store pack file");
 845        } else if (from_stdin)
 846                chmod(final_pack_name, 0444);
 847
 848        if (final_index_name != curr_index_name) {
 849                if (!final_index_name) {
 850                        snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
 851                                 get_object_directory(), sha1_to_hex(sha1));
 852                        final_index_name = name;
 853                }
 854                if (move_temp_to_file(curr_index_name, final_index_name))
 855                        die("cannot store index file");
 856        } else
 857                chmod(final_index_name, 0444);
 858
 859        if (!from_stdin) {
 860                printf("%s\n", sha1_to_hex(sha1));
 861        } else {
 862                char buf[48];
 863                int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
 864                                   report, sha1_to_hex(sha1));
 865                write_or_die(1, buf, len);
 866
 867                /*
 868                 * Let's just mimic git-unpack-objects here and write
 869                 * the last part of the input buffer to stdout.
 870                 */
 871                while (input_len) {
 872                        err = xwrite(1, input_buffer + input_offset, input_len);
 873                        if (err <= 0)
 874                                break;
 875                        input_len -= err;
 876                        input_offset += err;
 877                }
 878        }
 879}
 880
 881static int git_index_pack_config(const char *k, const char *v, void *cb)
 882{
 883        struct pack_idx_option *opts = cb;
 884
 885        if (!strcmp(k, "pack.indexversion")) {
 886                opts->version = git_config_int(k, v);
 887                if (opts->version > 2)
 888                        die("bad pack.indexversion=%"PRIu32, opts->version);
 889                return 0;
 890        }
 891        return git_default_config(k, v, cb);
 892}
 893
 894static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
 895{
 896        struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
 897
 898        if (!p)
 899                die("Cannot open existing pack file '%s'", pack_name);
 900        if (open_pack_index(p))
 901                die("Cannot open existing pack idx file for '%s'", pack_name);
 902
 903        /* Read the attributes from the existing idx file */
 904        opts->version = p->index_version;
 905
 906        /*
 907         * Get rid of the idx file as we do not need it anymore.
 908         * NEEDSWORK: extract this bit from free_pack_by_name() in
 909         * sha1_file.c, perhaps?  It shouldn't matter very much as we
 910         * know we haven't installed this pack (hence we never have
 911         * read anything from it).
 912         */
 913        close_pack_index(p);
 914        free(p);
 915}
 916
 917int cmd_index_pack(int argc, const char **argv, const char *prefix)
 918{
 919        int i, fix_thin_pack = 0, verify = 0;
 920        const char *curr_pack, *curr_index;
 921        const char *index_name = NULL, *pack_name = NULL;
 922        const char *keep_name = NULL, *keep_msg = NULL;
 923        char *index_name_buf = NULL, *keep_name_buf = NULL;
 924        struct pack_idx_entry **idx_objects;
 925        struct pack_idx_option opts;
 926        unsigned char pack_sha1[20];
 927
 928        if (argc == 2 && !strcmp(argv[1], "-h"))
 929                usage(index_pack_usage);
 930
 931        read_replace_refs = 0;
 932
 933        reset_pack_idx_option(&opts);
 934        git_config(git_index_pack_config, &opts);
 935        if (prefix && chdir(prefix))
 936                die("Cannot come back to cwd");
 937
 938        for (i = 1; i < argc; i++) {
 939                const char *arg = argv[i];
 940
 941                if (*arg == '-') {
 942                        if (!strcmp(arg, "--stdin")) {
 943                                from_stdin = 1;
 944                        } else if (!strcmp(arg, "--fix-thin")) {
 945                                fix_thin_pack = 1;
 946                        } else if (!strcmp(arg, "--strict")) {
 947                                strict = 1;
 948                        } else if (!strcmp(arg, "--verify")) {
 949                                verify = 1;
 950                        } else if (!strcmp(arg, "--keep")) {
 951                                keep_msg = "";
 952                        } else if (!prefixcmp(arg, "--keep=")) {
 953                                keep_msg = arg + 7;
 954                        } else if (!prefixcmp(arg, "--pack_header=")) {
 955                                struct pack_header *hdr;
 956                                char *c;
 957
 958                                hdr = (struct pack_header *)input_buffer;
 959                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 960                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 961                                if (*c != ',')
 962                                        die("bad %s", arg);
 963                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 964                                if (*c)
 965                                        die("bad %s", arg);
 966                                input_len = sizeof(*hdr);
 967                        } else if (!strcmp(arg, "-v")) {
 968                                verbose = 1;
 969                        } else if (!strcmp(arg, "-o")) {
 970                                if (index_name || (i+1) >= argc)
 971                                        usage(index_pack_usage);
 972                                index_name = argv[++i];
 973                        } else if (!prefixcmp(arg, "--index-version=")) {
 974                                char *c;
 975                                opts.version = strtoul(arg + 16, &c, 10);
 976                                if (opts.version > 2)
 977                                        die("bad %s", arg);
 978                                if (*c == ',')
 979                                        opts.off32_limit = strtoul(c+1, &c, 0);
 980                                if (*c || opts.off32_limit & 0x80000000)
 981                                        die("bad %s", arg);
 982                        } else
 983                                usage(index_pack_usage);
 984                        continue;
 985                }
 986
 987                if (pack_name)
 988                        usage(index_pack_usage);
 989                pack_name = arg;
 990        }
 991
 992        if (!pack_name && !from_stdin)
 993                usage(index_pack_usage);
 994        if (fix_thin_pack && !from_stdin)
 995                die("--fix-thin cannot be used without --stdin");
 996        if (!index_name && pack_name) {
 997                int len = strlen(pack_name);
 998                if (!has_extension(pack_name, ".pack"))
 999                        die("packfile name '%s' does not end with '.pack'",
1000                            pack_name);
1001                index_name_buf = xmalloc(len);
1002                memcpy(index_name_buf, pack_name, len - 5);
1003                strcpy(index_name_buf + len - 5, ".idx");
1004                index_name = index_name_buf;
1005        }
1006        if (keep_msg && !keep_name && pack_name) {
1007                int len = strlen(pack_name);
1008                if (!has_extension(pack_name, ".pack"))
1009                        die("packfile name '%s' does not end with '.pack'",
1010                            pack_name);
1011                keep_name_buf = xmalloc(len);
1012                memcpy(keep_name_buf, pack_name, len - 5);
1013                strcpy(keep_name_buf + len - 5, ".keep");
1014                keep_name = keep_name_buf;
1015        }
1016        if (verify) {
1017                if (!index_name)
1018                        die("--verify with no packfile name given");
1019                read_idx_option(&opts, index_name);
1020                opts.flags |= WRITE_IDX_VERIFY;
1021        }
1022
1023        curr_pack = open_pack_file(pack_name);
1024        parse_pack_header();
1025        objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
1026        deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
1027        parse_pack_objects(pack_sha1);
1028        if (nr_deltas == nr_resolved_deltas) {
1029                stop_progress(&progress);
1030                /* Flush remaining pack final 20-byte SHA1. */
1031                flush();
1032        } else {
1033                if (fix_thin_pack) {
1034                        struct sha1file *f;
1035                        unsigned char read_sha1[20], tail_sha1[20];
1036                        char msg[48];
1037                        int nr_unresolved = nr_deltas - nr_resolved_deltas;
1038                        int nr_objects_initial = nr_objects;
1039                        if (nr_unresolved <= 0)
1040                                die("confusion beyond insanity");
1041                        objects = xrealloc(objects,
1042                                           (nr_objects + nr_unresolved + 1)
1043                                           * sizeof(*objects));
1044                        f = sha1fd(output_fd, curr_pack);
1045                        fix_unresolved_deltas(f, nr_unresolved);
1046                        sprintf(msg, "completed with %d local objects",
1047                                nr_objects - nr_objects_initial);
1048                        stop_progress_msg(&progress, msg);
1049                        sha1close(f, tail_sha1, 0);
1050                        hashcpy(read_sha1, pack_sha1);
1051                        fixup_pack_header_footer(output_fd, pack_sha1,
1052                                                 curr_pack, nr_objects,
1053                                                 read_sha1, consumed_bytes-20);
1054                        if (hashcmp(read_sha1, tail_sha1) != 0)
1055                                die("Unexpected tail checksum for %s "
1056                                    "(disk corruption?)", curr_pack);
1057                }
1058                if (nr_deltas != nr_resolved_deltas)
1059                        die("pack has %d unresolved deltas",
1060                            nr_deltas - nr_resolved_deltas);
1061        }
1062        free(deltas);
1063        if (strict)
1064                check_objects();
1065
1066        idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *));
1067        for (i = 0; i < nr_objects; i++)
1068                idx_objects[i] = &objects[i].idx;
1069        curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1);
1070        free(idx_objects);
1071
1072        if (!verify)
1073                final(pack_name, curr_pack,
1074                      index_name, curr_index,
1075                      keep_name, keep_msg,
1076                      pack_sha1);
1077        else
1078                close(input_fd);
1079        free(objects);
1080        free(index_name_buf);
1081        free(keep_name_buf);
1082        if (pack_name == NULL)
1083                free((void *) curr_pack);
1084        if (index_name == NULL)
1085                free((void *) curr_index);
1086
1087        return 0;
1088}