pack-objects.con commit also adds progress when actually writing a pack (5e8dc75)
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4#include "pack.h"
   5#include "csum-file.h"
   6#include <sys/time.h>
   7#include <signal.h>
   8
   9static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
  10
  11struct object_entry {
  12        unsigned char sha1[20];
  13        unsigned long size;     /* uncompressed size */
  14        unsigned long offset;   /* offset into the final pack file;
  15                                 * nonzero if already written.
  16                                 */
  17        unsigned int depth;     /* delta depth */
  18        unsigned int delta_limit;       /* base adjustment for in-pack delta */
  19        unsigned int hash;      /* name hint hash */
  20        enum object_type type;
  21        enum object_type in_pack_type;  /* could be delta */
  22        unsigned long delta_size;       /* delta data size (uncompressed) */
  23        struct object_entry *delta;     /* delta base object */
  24        struct packed_git *in_pack;     /* already in pack */
  25        unsigned int in_pack_offset;
  26        struct object_entry *delta_child; /* delitified objects who bases me */
  27        struct object_entry *delta_sibling; /* other deltified objects who
  28                                             * uses the same base as me
  29                                             */
  30};
  31
  32/*
  33 * Objects we are going to pack are colected in objects array (dynamically
  34 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
  35 * in the order we see -- typically rev-list --objects order that gives us
  36 * nice "minimum seek" order.
  37 *
  38 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
  39 * elements in the objects array.  The former is used to build the pack
  40 * index (lists object names in the ascending order to help offset lookup),
  41 * and the latter is used to group similar things together by try_delta()
  42 * heuristics.
  43 */
  44
  45static unsigned char object_list_sha1[20];
  46static int non_empty = 0;
  47static int no_reuse_delta = 0;
  48static int local = 0;
  49static int incremental = 0;
  50static struct object_entry **sorted_by_sha, **sorted_by_type;
  51static struct object_entry *objects = NULL;
  52static int nr_objects = 0, nr_alloc = 0;
  53static const char *base_name;
  54static unsigned char pack_file_sha1[20];
  55static int progress = 1;
  56static volatile int progress_update = 0;
  57
  58/*
  59 * The object names in objects array are hashed with this hashtable,
  60 * to help looking up the entry by object name.  Binary search from
  61 * sorted_by_sha is also possible but this was easier to code and faster.
  62 * This hashtable is built after all the objects are seen.
  63 */
  64static int *object_ix = NULL;
  65static int object_ix_hashsz = 0;
  66
  67/*
  68 * Pack index for existing packs give us easy access to the offsets into
  69 * corresponding pack file where each object's data starts, but the entries
  70 * do not store the size of the compressed representation (uncompressed
  71 * size is easily available by examining the pack entry header).  We build
  72 * a hashtable of existing packs (pack_revindex), and keep reverse index
  73 * here -- pack index file is sorted by object name mapping to offset; this
  74 * pack_revindex[].revindex array is an ordered list of offsets, so if you
  75 * know the offset of an object, next offset is where its packed
  76 * representation ends.
  77 */
  78struct pack_revindex {
  79        struct packed_git *p;
  80        unsigned long *revindex;
  81} *pack_revindex = NULL;
  82static int pack_revindex_hashsz = 0;
  83
  84/*
  85 * stats
  86 */
  87static int written = 0;
  88static int written_delta = 0;
  89static int reused = 0;
  90static int reused_delta = 0;
  91
  92static int pack_revindex_ix(struct packed_git *p)
  93{
  94        unsigned int ui = (unsigned int) p;
  95        int i;
  96
  97        ui = ui ^ (ui >> 16); /* defeat structure alignment */
  98        i = (int)(ui % pack_revindex_hashsz);
  99        while (pack_revindex[i].p) {
 100                if (pack_revindex[i].p == p)
 101                        return i;
 102                if (++i == pack_revindex_hashsz)
 103                        i = 0;
 104        }
 105        return -1 - i;
 106}
 107
 108static void prepare_pack_ix(void)
 109{
 110        int num;
 111        struct packed_git *p;
 112        for (num = 0, p = packed_git; p; p = p->next)
 113                num++;
 114        if (!num)
 115                return;
 116        pack_revindex_hashsz = num * 11;
 117        pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
 118        for (p = packed_git; p; p = p->next) {
 119                num = pack_revindex_ix(p);
 120                num = - 1 - num;
 121                pack_revindex[num].p = p;
 122        }
 123        /* revindex elements are lazily initialized */
 124}
 125
 126static int cmp_offset(const void *a_, const void *b_)
 127{
 128        unsigned long a = *(unsigned long *) a_;
 129        unsigned long b = *(unsigned long *) b_;
 130        if (a < b)
 131                return -1;
 132        else if (a == b)
 133                return 0;
 134        else
 135                return 1;
 136}
 137
 138/*
 139 * Ordered list of offsets of objects in the pack.
 140 */
 141static void prepare_pack_revindex(struct pack_revindex *rix)
 142{
 143        struct packed_git *p = rix->p;
 144        int num_ent = num_packed_objects(p);
 145        int i;
 146        void *index = p->index_base + 256;
 147
 148        rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
 149        for (i = 0; i < num_ent; i++) {
 150                long hl = *((long *)(index + 24 * i));
 151                rix->revindex[i] = ntohl(hl);
 152        }
 153        /* This knows the pack format -- the 20-byte trailer
 154         * follows immediately after the last object data.
 155         */
 156        rix->revindex[num_ent] = p->pack_size - 20;
 157        qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
 158}
 159
 160static unsigned long find_packed_object_size(struct packed_git *p,
 161                                             unsigned long ofs)
 162{
 163        int num;
 164        int lo, hi;
 165        struct pack_revindex *rix;
 166        unsigned long *revindex;
 167        num = pack_revindex_ix(p);
 168        if (num < 0)
 169                die("internal error: pack revindex uninitialized");
 170        rix = &pack_revindex[num];
 171        if (!rix->revindex)
 172                prepare_pack_revindex(rix);
 173        revindex = rix->revindex;
 174        lo = 0;
 175        hi = num_packed_objects(p) + 1;
 176        do {
 177                int mi = (lo + hi) / 2;
 178                if (revindex[mi] == ofs) {
 179                        return revindex[mi+1] - ofs;
 180                }
 181                else if (ofs < revindex[mi])
 182                        hi = mi;
 183                else
 184                        lo = mi + 1;
 185        } while (lo < hi);
 186        die("internal error: pack revindex corrupt");
 187}
 188
 189static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 190{
 191        unsigned long othersize, delta_size;
 192        char type[10];
 193        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
 194        void *delta_buf;
 195
 196        if (!otherbuf)
 197                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
 198        delta_buf = diff_delta(otherbuf, othersize,
 199                               buf, size, &delta_size, 0);
 200        if (!delta_buf || delta_size != entry->delta_size)
 201                die("delta size changed");
 202        free(buf);
 203        free(otherbuf);
 204        return delta_buf;
 205}
 206
 207/*
 208 * The per-object header is a pretty dense thing, which is
 209 *  - first byte: low four bits are "size", then three bits of "type",
 210 *    and the high bit is "size continues".
 211 *  - each byte afterwards: low seven bits are size continuation,
 212 *    with the high bit being "size continues"
 213 */
 214static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
 215{
 216        int n = 1;
 217        unsigned char c;
 218
 219        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 220                die("bad type %d", type);
 221
 222        c = (type << 4) | (size & 15);
 223        size >>= 4;
 224        while (size) {
 225                *hdr++ = c | 0x80;
 226                c = size & 0x7f;
 227                size >>= 7;
 228                n++;
 229        }
 230        *hdr = c;
 231        return n;
 232}
 233
 234static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
 235{
 236        unsigned long size;
 237        char type[10];
 238        void *buf;
 239        unsigned char header[10];
 240        unsigned hdrlen, datalen;
 241        enum object_type obj_type;
 242        int to_reuse = 0;
 243
 244        obj_type = entry->type;
 245        if (! entry->in_pack)
 246                to_reuse = 0;   /* can't reuse what we don't have */
 247        else if (obj_type == OBJ_DELTA)
 248                to_reuse = 1;   /* check_object() decided it for us */
 249        else if (obj_type != entry->in_pack_type)
 250                to_reuse = 0;   /* pack has delta which is unusable */
 251        else if (entry->delta)
 252                to_reuse = 0;   /* we want to pack afresh */
 253        else
 254                to_reuse = 1;   /* we have it in-pack undeltified,
 255                                 * and we do not need to deltify it.
 256                                 */
 257
 258        if (! to_reuse) {
 259                buf = read_sha1_file(entry->sha1, type, &size);
 260                if (!buf)
 261                        die("unable to read %s", sha1_to_hex(entry->sha1));
 262                if (size != entry->size)
 263                        die("object %s size inconsistency (%lu vs %lu)",
 264                            sha1_to_hex(entry->sha1), size, entry->size);
 265                if (entry->delta) {
 266                        buf = delta_against(buf, size, entry);
 267                        size = entry->delta_size;
 268                        obj_type = OBJ_DELTA;
 269                }
 270                /*
 271                 * The object header is a byte of 'type' followed by zero or
 272                 * more bytes of length.  For deltas, the 20 bytes of delta
 273                 * sha1 follows that.
 274                 */
 275                hdrlen = encode_header(obj_type, size, header);
 276                sha1write(f, header, hdrlen);
 277
 278                if (entry->delta) {
 279                        sha1write(f, entry->delta, 20);
 280                        hdrlen += 20;
 281                }
 282                datalen = sha1write_compressed(f, buf, size);
 283                free(buf);
 284        }
 285        else {
 286                struct packed_git *p = entry->in_pack;
 287                use_packed_git(p);
 288
 289                datalen = find_packed_object_size(p, entry->in_pack_offset);
 290                buf = p->pack_base + entry->in_pack_offset;
 291                sha1write(f, buf, datalen);
 292                unuse_packed_git(p);
 293                hdrlen = 0; /* not really */
 294                if (obj_type == OBJ_DELTA)
 295                        reused_delta++;
 296                reused++;
 297        }
 298        if (obj_type == OBJ_DELTA)
 299                written_delta++;
 300        written++;
 301        return hdrlen + datalen;
 302}
 303
 304static unsigned long write_one(struct sha1file *f,
 305                               struct object_entry *e,
 306                               unsigned long offset)
 307{
 308        if (e->offset)
 309                /* offset starts from header size and cannot be zero
 310                 * if it is written already.
 311                 */
 312                return offset;
 313        e->offset = offset;
 314        offset += write_object(f, e);
 315        /* if we are deltified, write out its base object. */
 316        if (e->delta)
 317                offset = write_one(f, e->delta, offset);
 318        return offset;
 319}
 320
 321static void write_pack_file(void)
 322{
 323        int i;
 324        struct sha1file *f;
 325        unsigned long offset;
 326        struct pack_header hdr;
 327
 328        if (!base_name)
 329                f = sha1fd(1, "<stdout>");
 330        else
 331                f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
 332        hdr.hdr_signature = htonl(PACK_SIGNATURE);
 333        hdr.hdr_version = htonl(PACK_VERSION);
 334        hdr.hdr_entries = htonl(nr_objects);
 335        sha1write(f, &hdr, sizeof(hdr));
 336        offset = sizeof(hdr);
 337        for (i = 0; i < nr_objects; i++) {
 338                offset = write_one(f, objects + i, offset);
 339                if (progress_update) {
 340                        fprintf(stderr, "Writing (%d %d%%)\r",
 341                                i+1, (i+1) * 100/nr_objects);
 342                        progress_update = 0;
 343                }
 344        }
 345
 346        sha1close(f, pack_file_sha1, 1);
 347}
 348
 349static void write_index_file(void)
 350{
 351        int i;
 352        struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
 353        struct object_entry **list = sorted_by_sha;
 354        struct object_entry **last = list + nr_objects;
 355        unsigned int array[256];
 356
 357        /*
 358         * Write the first-level table (the list is sorted,
 359         * but we use a 256-entry lookup to be able to avoid
 360         * having to do eight extra binary search iterations).
 361         */
 362        for (i = 0; i < 256; i++) {
 363                struct object_entry **next = list;
 364                while (next < last) {
 365                        struct object_entry *entry = *next;
 366                        if (entry->sha1[0] != i)
 367                                break;
 368                        next++;
 369                }
 370                array[i] = htonl(next - sorted_by_sha);
 371                list = next;
 372        }
 373        sha1write(f, array, 256 * sizeof(int));
 374
 375        /*
 376         * Write the actual SHA1 entries..
 377         */
 378        list = sorted_by_sha;
 379        for (i = 0; i < nr_objects; i++) {
 380                struct object_entry *entry = *list++;
 381                unsigned int offset = htonl(entry->offset);
 382                sha1write(f, &offset, 4);
 383                sha1write(f, entry->sha1, 20);
 384        }
 385        sha1write(f, pack_file_sha1, 20);
 386        sha1close(f, NULL, 1);
 387}
 388
 389static int add_object_entry(unsigned char *sha1, unsigned int hash)
 390{
 391        unsigned int idx = nr_objects;
 392        struct object_entry *entry;
 393        struct packed_git *p;
 394        unsigned int found_offset = 0;
 395        struct packed_git *found_pack = NULL;
 396
 397        for (p = packed_git; p; p = p->next) {
 398                struct pack_entry e;
 399                if (find_pack_entry_one(sha1, &e, p)) {
 400                        if (incremental)
 401                                return 0;
 402                        if (local && !p->pack_local)
 403                                return 0;
 404                        if (!found_pack) {
 405                                found_offset = e.offset;
 406                                found_pack = e.p;
 407                        }
 408                }
 409        }
 410
 411        if (idx >= nr_alloc) {
 412                unsigned int needed = (idx + 1024) * 3 / 2;
 413                objects = xrealloc(objects, needed * sizeof(*entry));
 414                nr_alloc = needed;
 415        }
 416        entry = objects + idx;
 417        memset(entry, 0, sizeof(*entry));
 418        memcpy(entry->sha1, sha1, 20);
 419        entry->hash = hash;
 420        if (found_pack) {
 421                entry->in_pack = found_pack;
 422                entry->in_pack_offset = found_offset;
 423        }
 424        nr_objects = idx+1;
 425        return 1;
 426}
 427
 428static int locate_object_entry_hash(unsigned char *sha1)
 429{
 430        int i;
 431        unsigned int ui;
 432        memcpy(&ui, sha1, sizeof(unsigned int));
 433        i = ui % object_ix_hashsz;
 434        while (0 < object_ix[i]) {
 435                if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
 436                        return i;
 437                if (++i == object_ix_hashsz)
 438                        i = 0;
 439        }
 440        return -1 - i;
 441}
 442
 443static struct object_entry *locate_object_entry(unsigned char *sha1)
 444{
 445        int i = locate_object_entry_hash(sha1);
 446        if (0 <= i)
 447                return &objects[object_ix[i]-1];
 448        return NULL;
 449}
 450
 451static void check_object(struct object_entry *entry)
 452{
 453        char type[20];
 454
 455        if (entry->in_pack) {
 456                unsigned char base[20];
 457                unsigned long size;
 458                struct object_entry *base_entry;
 459
 460                /* We want in_pack_type even if we do not reuse delta.
 461                 * There is no point not reusing non-delta representations.
 462                 */
 463                check_reuse_pack_delta(entry->in_pack,
 464                                       entry->in_pack_offset,
 465                                       base, &size,
 466                                       &entry->in_pack_type);
 467
 468                /* Check if it is delta, and the base is also an object
 469                 * we are going to pack.  If so we will reuse the existing
 470                 * delta.
 471                 */
 472                if (!no_reuse_delta &&
 473                    entry->in_pack_type == OBJ_DELTA &&
 474                    (base_entry = locate_object_entry(base))) {
 475
 476                        /* Depth value does not matter - find_deltas()
 477                         * will never consider reused delta as the
 478                         * base object to deltify other objects
 479                         * against, in order to avoid circular deltas.
 480                         */
 481
 482                        /* uncompressed size of the delta data */
 483                        entry->size = entry->delta_size = size;
 484                        entry->delta = base_entry;
 485                        entry->type = OBJ_DELTA;
 486
 487                        entry->delta_sibling = base_entry->delta_child;
 488                        base_entry->delta_child = entry;
 489
 490                        return;
 491                }
 492                /* Otherwise we would do the usual */
 493        }
 494
 495        if (sha1_object_info(entry->sha1, type, &entry->size))
 496                die("unable to get type of object %s",
 497                    sha1_to_hex(entry->sha1));
 498
 499        if (!strcmp(type, "commit")) {
 500                entry->type = OBJ_COMMIT;
 501        } else if (!strcmp(type, "tree")) {
 502                entry->type = OBJ_TREE;
 503        } else if (!strcmp(type, "blob")) {
 504                entry->type = OBJ_BLOB;
 505        } else if (!strcmp(type, "tag")) {
 506                entry->type = OBJ_TAG;
 507        } else
 508                die("unable to pack object %s of type %s",
 509                    sha1_to_hex(entry->sha1), type);
 510}
 511
 512static void hash_objects(void)
 513{
 514        int i;
 515        struct object_entry *oe;
 516
 517        object_ix_hashsz = nr_objects * 2;
 518        object_ix = xcalloc(sizeof(int), object_ix_hashsz);
 519        for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
 520                int ix = locate_object_entry_hash(oe->sha1);
 521                if (0 <= ix) {
 522                        error("the same object '%s' added twice",
 523                              sha1_to_hex(oe->sha1));
 524                        continue;
 525                }
 526                ix = -1 - ix;
 527                object_ix[ix] = i + 1;
 528        }
 529}
 530
 531static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 532{
 533        struct object_entry *child = me->delta_child;
 534        unsigned int m = n;
 535        while (child) {
 536                unsigned int c = check_delta_limit(child, n + 1);
 537                if (m < c)
 538                        m = c;
 539                child = child->delta_sibling;
 540        }
 541        return m;
 542}
 543
 544static void get_object_details(void)
 545{
 546        int i;
 547        struct object_entry *entry;
 548
 549        hash_objects();
 550        prepare_pack_ix();
 551        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 552                check_object(entry);
 553        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 554                if (!entry->delta && entry->delta_child)
 555                        entry->delta_limit =
 556                                check_delta_limit(entry, 1);
 557}
 558
 559typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 560
 561static entry_sort_t current_sort;
 562
 563static int sort_comparator(const void *_a, const void *_b)
 564{
 565        struct object_entry *a = *(struct object_entry **)_a;
 566        struct object_entry *b = *(struct object_entry **)_b;
 567        return current_sort(a,b);
 568}
 569
 570static struct object_entry **create_sorted_list(entry_sort_t sort)
 571{
 572        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 573        int i;
 574
 575        for (i = 0; i < nr_objects; i++)
 576                list[i] = objects + i;
 577        current_sort = sort;
 578        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 579        return list;
 580}
 581
 582static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 583{
 584        return memcmp(a->sha1, b->sha1, 20);
 585}
 586
 587static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 588{
 589        if (a->type < b->type)
 590                return -1;
 591        if (a->type > b->type)
 592                return 1;
 593        if (a->hash < b->hash)
 594                return -1;
 595        if (a->hash > b->hash)
 596                return 1;
 597        if (a->size < b->size)
 598                return -1;
 599        if (a->size > b->size)
 600                return 1;
 601        return a < b ? -1 : (a > b);
 602}
 603
 604struct unpacked {
 605        struct object_entry *entry;
 606        void *data;
 607};
 608
 609/*
 610 * We search for deltas _backwards_ in a list sorted by type and
 611 * by size, so that we see progressively smaller and smaller files.
 612 * That's because we prefer deltas to be from the bigger file
 613 * to the smaller - deletes are potentially cheaper, but perhaps
 614 * more importantly, the bigger file is likely the more recent
 615 * one.
 616 */
 617static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
 618{
 619        struct object_entry *cur_entry = cur->entry;
 620        struct object_entry *old_entry = old->entry;
 621        unsigned long size, oldsize, delta_size, sizediff;
 622        long max_size;
 623        void *delta_buf;
 624
 625        /* Don't bother doing diffs between different types */
 626        if (cur_entry->type != old_entry->type)
 627                return -1;
 628
 629        /* If the current object is at edge, take the depth the objects
 630         * that depend on the current object into account -- otherwise
 631         * they would become too deep.
 632         */
 633        if (cur_entry->delta_child) {
 634                if (max_depth <= cur_entry->delta_limit)
 635                        return 0;
 636                max_depth -= cur_entry->delta_limit;
 637        }
 638
 639        size = cur_entry->size;
 640        if (size < 50)
 641                return -1;
 642        oldsize = old_entry->size;
 643        sizediff = oldsize > size ? oldsize - size : size - oldsize;
 644        if (sizediff > size / 8)
 645                return -1;
 646        if (old_entry->depth >= max_depth)
 647                return 0;
 648
 649        /*
 650         * NOTE!
 651         *
 652         * We always delta from the bigger to the smaller, since that's
 653         * more space-efficient (deletes don't have to say _what_ they
 654         * delete).
 655         */
 656        max_size = size / 2 - 20;
 657        if (cur_entry->delta)
 658                max_size = cur_entry->delta_size-1;
 659        if (sizediff >= max_size)
 660                return -1;
 661        delta_buf = diff_delta(old->data, oldsize,
 662                               cur->data, size, &delta_size, max_size);
 663        if (!delta_buf)
 664                return 0;
 665        cur_entry->delta = old_entry;
 666        cur_entry->delta_size = delta_size;
 667        cur_entry->depth = old_entry->depth + 1;
 668        free(delta_buf);
 669        return 0;
 670}
 671
 672static void progress_interval(int signum)
 673{
 674        signal(SIGALRM, progress_interval);
 675        progress_update = 1;
 676}
 677
 678static void find_deltas(struct object_entry **list, int window, int depth)
 679{
 680        int i, idx;
 681        unsigned int array_size = window * sizeof(struct unpacked);
 682        struct unpacked *array = xmalloc(array_size);
 683
 684        memset(array, 0, array_size);
 685        i = nr_objects;
 686        idx = 0;
 687
 688        while (--i >= 0) {
 689                struct object_entry *entry = list[i];
 690                struct unpacked *n = array + idx;
 691                unsigned long size;
 692                char type[10];
 693                int j;
 694
 695                if (progress_update || i == 0) {
 696                        fprintf(stderr, "Deltifying (%d %d%%)\r",
 697                                nr_objects-i, (nr_objects-i) * 100/nr_objects);
 698                        progress_update = 0;
 699                }
 700
 701                if (entry->delta)
 702                        /* This happens if we decided to reuse existing
 703                         * delta from a pack.  "!no_reuse_delta &&" is implied.
 704                         */
 705                        continue;
 706
 707                free(n->data);
 708                n->entry = entry;
 709                n->data = read_sha1_file(entry->sha1, type, &size);
 710                if (size != entry->size)
 711                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 712
 713                j = window;
 714                while (--j > 0) {
 715                        unsigned int other_idx = idx + j;
 716                        struct unpacked *m;
 717                        if (other_idx >= window)
 718                                other_idx -= window;
 719                        m = array + other_idx;
 720                        if (!m->entry)
 721                                break;
 722                        if (try_delta(n, m, depth) < 0)
 723                                break;
 724                }
 725                idx++;
 726                if (idx >= window)
 727                        idx = 0;
 728        }
 729
 730        if (progress)
 731                fputc('\n', stderr);
 732
 733        for (i = 0; i < window; ++i)
 734                free(array[i].data);
 735        free(array);
 736}
 737
 738static void prepare_pack(int window, int depth)
 739{
 740        get_object_details();
 741        sorted_by_type = create_sorted_list(type_size_sort);
 742        if (window && depth)
 743                find_deltas(sorted_by_type, window+1, depth);
 744}
 745
 746static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
 747{
 748        static const char cache[] = "pack-cache/pack-%s.%s";
 749        char *cached_pack, *cached_idx;
 750        int ifd, ofd, ifd_ix = -1;
 751
 752        cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
 753        ifd = open(cached_pack, O_RDONLY);
 754        if (ifd < 0)
 755                return 0;
 756
 757        if (!pack_to_stdout) {
 758                cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
 759                ifd_ix = open(cached_idx, O_RDONLY);
 760                if (ifd_ix < 0) {
 761                        close(ifd);
 762                        return 0;
 763                }
 764        }
 765
 766        if (progress)
 767                fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
 768                        sha1_to_hex(sha1));
 769
 770        if (pack_to_stdout) {
 771                if (copy_fd(ifd, 1))
 772                        exit(1);
 773                close(ifd);
 774        }
 775        else {
 776                char name[PATH_MAX];
 777                snprintf(name, sizeof(name),
 778                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
 779                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 780                if (ofd < 0)
 781                        die("unable to open %s (%s)", name, strerror(errno));
 782                if (copy_fd(ifd, ofd))
 783                        exit(1);
 784                close(ifd);
 785
 786                snprintf(name, sizeof(name),
 787                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
 788                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 789                if (ofd < 0)
 790                        die("unable to open %s (%s)", name, strerror(errno));
 791                if (copy_fd(ifd_ix, ofd))
 792                        exit(1);
 793                close(ifd_ix);
 794                puts(sha1_to_hex(sha1));
 795        }
 796
 797        return 1;
 798}
 799
 800int main(int argc, char **argv)
 801{
 802        SHA_CTX ctx;
 803        char line[PATH_MAX + 20];
 804        int window = 10, depth = 10, pack_to_stdout = 0;
 805        struct object_entry **list;
 806        int i;
 807
 808        setup_git_directory();
 809
 810        for (i = 1; i < argc; i++) {
 811                const char *arg = argv[i];
 812
 813                if (*arg == '-') {
 814                        if (!strcmp("--non-empty", arg)) {
 815                                non_empty = 1;
 816                                continue;
 817                        }
 818                        if (!strcmp("--local", arg)) {
 819                                local = 1;
 820                                continue;
 821                        }
 822                        if (!strcmp("--incremental", arg)) {
 823                                incremental = 1;
 824                                continue;
 825                        }
 826                        if (!strncmp("--window=", arg, 9)) {
 827                                char *end;
 828                                window = strtoul(arg+9, &end, 0);
 829                                if (!arg[9] || *end)
 830                                        usage(pack_usage);
 831                                continue;
 832                        }
 833                        if (!strncmp("--depth=", arg, 8)) {
 834                                char *end;
 835                                depth = strtoul(arg+8, &end, 0);
 836                                if (!arg[8] || *end)
 837                                        usage(pack_usage);
 838                                continue;
 839                        }
 840                        if (!strcmp("-q", arg)) {
 841                                progress = 0;
 842                                continue;
 843                        }
 844                        if (!strcmp("--no-reuse-delta", arg)) {
 845                                no_reuse_delta = 1;
 846                                continue;
 847                        }
 848                        if (!strcmp("--stdout", arg)) {
 849                                pack_to_stdout = 1;
 850                                continue;
 851                        }
 852                        usage(pack_usage);
 853                }
 854                if (base_name)
 855                        usage(pack_usage);
 856                base_name = arg;
 857        }
 858
 859        if (pack_to_stdout != !base_name)
 860                usage(pack_usage);
 861
 862        prepare_packed_git();
 863
 864        if (progress) {
 865                struct itimerval v;
 866                v.it_interval.tv_sec = 1;
 867                v.it_interval.tv_usec = 0;
 868                v.it_value = v.it_interval;
 869                signal(SIGALRM, progress_interval);
 870                setitimer(ITIMER_REAL, &v, NULL);
 871                fprintf(stderr, "Generating pack...\n");
 872        }
 873
 874        while (fgets(line, sizeof(line), stdin) != NULL) {
 875                unsigned int hash;
 876                char *p;
 877                unsigned char sha1[20];
 878
 879                if (progress_update) {
 880                        fprintf(stderr, "Counting objects...%d\r", nr_objects);
 881                        progress_update = 0;
 882                }
 883                if (get_sha1_hex(line, sha1))
 884                        die("expected sha1, got garbage:\n %s", line);
 885                hash = 0;
 886                p = line+40;
 887                while (*p) {
 888                        unsigned char c = *p++;
 889                        if (isspace(c))
 890                                continue;
 891                        hash = hash * 11 + c;
 892                }
 893                add_object_entry(sha1, hash);
 894        }
 895        if (progress)
 896                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
 897        if (non_empty && !nr_objects)
 898                return 0;
 899
 900        sorted_by_sha = create_sorted_list(sha1_sort);
 901        SHA1_Init(&ctx);
 902        list = sorted_by_sha;
 903        for (i = 0; i < nr_objects; i++) {
 904                struct object_entry *entry = *list++;
 905                SHA1_Update(&ctx, entry->sha1, 20);
 906        }
 907        SHA1_Final(object_list_sha1, &ctx);
 908
 909        if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
 910                ;
 911        else {
 912                prepare_pack(window, depth);
 913                if (progress && pack_to_stdout) {
 914                        /* the other end usually displays progress itself */
 915                        struct itimerval v = {{0,},};
 916                        setitimer(ITIMER_REAL, &v, NULL);
 917                        signal(SIGALRM, SIG_IGN );
 918                        progress_update = 0;
 919                }
 920                write_pack_file();
 921                if (!pack_to_stdout) {
 922                        write_index_file();
 923                        puts(sha1_to_hex(object_list_sha1));
 924                }
 925        }
 926        if (progress)
 927                fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
 928                        nr_objects, written, written_delta, reused, reused_delta);
 929        return 0;
 930}