pack-objects.con commit nicer eye candies for pack-objects (b2504a0)
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4#include "pack.h"
   5#include "csum-file.h"
   6#include <sys/time.h>
   7#include <signal.h>
   8
   9static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
  10
  11struct object_entry {
  12        unsigned char sha1[20];
  13        unsigned long size;     /* uncompressed size */
  14        unsigned long offset;   /* offset into the final pack file;
  15                                 * nonzero if already written.
  16                                 */
  17        unsigned int depth;     /* delta depth */
  18        unsigned int delta_limit;       /* base adjustment for in-pack delta */
  19        unsigned int hash;      /* name hint hash */
  20        enum object_type type;
  21        enum object_type in_pack_type;  /* could be delta */
  22        unsigned long delta_size;       /* delta data size (uncompressed) */
  23        struct object_entry *delta;     /* delta base object */
  24        struct packed_git *in_pack;     /* already in pack */
  25        unsigned int in_pack_offset;
  26        struct object_entry *delta_child; /* delitified objects who bases me */
  27        struct object_entry *delta_sibling; /* other deltified objects who
  28                                             * uses the same base as me
  29                                             */
  30};
  31
  32/*
  33 * Objects we are going to pack are colected in objects array (dynamically
  34 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
  35 * in the order we see -- typically rev-list --objects order that gives us
  36 * nice "minimum seek" order.
  37 *
  38 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
  39 * elements in the objects array.  The former is used to build the pack
  40 * index (lists object names in the ascending order to help offset lookup),
  41 * and the latter is used to group similar things together by try_delta()
  42 * heuristics.
  43 */
  44
  45static unsigned char object_list_sha1[20];
  46static int non_empty = 0;
  47static int no_reuse_delta = 0;
  48static int local = 0;
  49static int incremental = 0;
  50static struct object_entry **sorted_by_sha, **sorted_by_type;
  51static struct object_entry *objects = NULL;
  52static int nr_objects = 0, nr_alloc = 0;
  53static const char *base_name;
  54static unsigned char pack_file_sha1[20];
  55static int progress = 1;
  56
  57/*
  58 * The object names in objects array are hashed with this hashtable,
  59 * to help looking up the entry by object name.  Binary search from
  60 * sorted_by_sha is also possible but this was easier to code and faster.
  61 * This hashtable is built after all the objects are seen.
  62 */
  63static int *object_ix = NULL;
  64static int object_ix_hashsz = 0;
  65
  66/*
  67 * Pack index for existing packs give us easy access to the offsets into
  68 * corresponding pack file where each object's data starts, but the entries
  69 * do not store the size of the compressed representation (uncompressed
  70 * size is easily available by examining the pack entry header).  We build
  71 * a hashtable of existing packs (pack_revindex), and keep reverse index
  72 * here -- pack index file is sorted by object name mapping to offset; this
  73 * pack_revindex[].revindex array is an ordered list of offsets, so if you
  74 * know the offset of an object, next offset is where its packed
  75 * representation ends.
  76 */
  77struct pack_revindex {
  78        struct packed_git *p;
  79        unsigned long *revindex;
  80} *pack_revindex = NULL;
  81static int pack_revindex_hashsz = 0;
  82
  83/*
  84 * stats
  85 */
  86static int written = 0;
  87static int written_delta = 0;
  88static int reused = 0;
  89static int reused_delta = 0;
  90
  91static int pack_revindex_ix(struct packed_git *p)
  92{
  93        unsigned int ui = (unsigned int) p;
  94        int i;
  95
  96        ui = ui ^ (ui >> 16); /* defeat structure alignment */
  97        i = (int)(ui % pack_revindex_hashsz);
  98        while (pack_revindex[i].p) {
  99                if (pack_revindex[i].p == p)
 100                        return i;
 101                if (++i == pack_revindex_hashsz)
 102                        i = 0;
 103        }
 104        return -1 - i;
 105}
 106
 107static void prepare_pack_ix(void)
 108{
 109        int num;
 110        struct packed_git *p;
 111        for (num = 0, p = packed_git; p; p = p->next)
 112                num++;
 113        if (!num)
 114                return;
 115        pack_revindex_hashsz = num * 11;
 116        pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
 117        for (p = packed_git; p; p = p->next) {
 118                num = pack_revindex_ix(p);
 119                num = - 1 - num;
 120                pack_revindex[num].p = p;
 121        }
 122        /* revindex elements are lazily initialized */
 123}
 124
 125static int cmp_offset(const void *a_, const void *b_)
 126{
 127        unsigned long a = *(unsigned long *) a_;
 128        unsigned long b = *(unsigned long *) b_;
 129        if (a < b)
 130                return -1;
 131        else if (a == b)
 132                return 0;
 133        else
 134                return 1;
 135}
 136
 137/*
 138 * Ordered list of offsets of objects in the pack.
 139 */
 140static void prepare_pack_revindex(struct pack_revindex *rix)
 141{
 142        struct packed_git *p = rix->p;
 143        int num_ent = num_packed_objects(p);
 144        int i;
 145        void *index = p->index_base + 256;
 146
 147        rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
 148        for (i = 0; i < num_ent; i++) {
 149                long hl = *((long *)(index + 24 * i));
 150                rix->revindex[i] = ntohl(hl);
 151        }
 152        /* This knows the pack format -- the 20-byte trailer
 153         * follows immediately after the last object data.
 154         */
 155        rix->revindex[num_ent] = p->pack_size - 20;
 156        qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
 157}
 158
 159static unsigned long find_packed_object_size(struct packed_git *p,
 160                                             unsigned long ofs)
 161{
 162        int num;
 163        int lo, hi;
 164        struct pack_revindex *rix;
 165        unsigned long *revindex;
 166        num = pack_revindex_ix(p);
 167        if (num < 0)
 168                die("internal error: pack revindex uninitialized");
 169        rix = &pack_revindex[num];
 170        if (!rix->revindex)
 171                prepare_pack_revindex(rix);
 172        revindex = rix->revindex;
 173        lo = 0;
 174        hi = num_packed_objects(p) + 1;
 175        do {
 176                int mi = (lo + hi) / 2;
 177                if (revindex[mi] == ofs) {
 178                        return revindex[mi+1] - ofs;
 179                }
 180                else if (ofs < revindex[mi])
 181                        hi = mi;
 182                else
 183                        lo = mi + 1;
 184        } while (lo < hi);
 185        die("internal error: pack revindex corrupt");
 186}
 187
 188static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 189{
 190        unsigned long othersize, delta_size;
 191        char type[10];
 192        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
 193        void *delta_buf;
 194
 195        if (!otherbuf)
 196                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
 197        delta_buf = diff_delta(otherbuf, othersize,
 198                               buf, size, &delta_size, 0);
 199        if (!delta_buf || delta_size != entry->delta_size)
 200                die("delta size changed");
 201        free(buf);
 202        free(otherbuf);
 203        return delta_buf;
 204}
 205
 206/*
 207 * The per-object header is a pretty dense thing, which is
 208 *  - first byte: low four bits are "size", then three bits of "type",
 209 *    and the high bit is "size continues".
 210 *  - each byte afterwards: low seven bits are size continuation,
 211 *    with the high bit being "size continues"
 212 */
 213static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
 214{
 215        int n = 1;
 216        unsigned char c;
 217
 218        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 219                die("bad type %d", type);
 220
 221        c = (type << 4) | (size & 15);
 222        size >>= 4;
 223        while (size) {
 224                *hdr++ = c | 0x80;
 225                c = size & 0x7f;
 226                size >>= 7;
 227                n++;
 228        }
 229        *hdr = c;
 230        return n;
 231}
 232
 233static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
 234{
 235        unsigned long size;
 236        char type[10];
 237        void *buf;
 238        unsigned char header[10];
 239        unsigned hdrlen, datalen;
 240        enum object_type obj_type;
 241        int to_reuse = 0;
 242
 243        obj_type = entry->type;
 244        if (! entry->in_pack)
 245                to_reuse = 0;   /* can't reuse what we don't have */
 246        else if (obj_type == OBJ_DELTA)
 247                to_reuse = 1;   /* check_object() decided it for us */
 248        else if (obj_type != entry->in_pack_type)
 249                to_reuse = 0;   /* pack has delta which is unusable */
 250        else if (entry->delta)
 251                to_reuse = 0;   /* we want to pack afresh */
 252        else
 253                to_reuse = 1;   /* we have it in-pack undeltified,
 254                                 * and we do not need to deltify it.
 255                                 */
 256
 257        if (! to_reuse) {
 258                buf = read_sha1_file(entry->sha1, type, &size);
 259                if (!buf)
 260                        die("unable to read %s", sha1_to_hex(entry->sha1));
 261                if (size != entry->size)
 262                        die("object %s size inconsistency (%lu vs %lu)",
 263                            sha1_to_hex(entry->sha1), size, entry->size);
 264                if (entry->delta) {
 265                        buf = delta_against(buf, size, entry);
 266                        size = entry->delta_size;
 267                        obj_type = OBJ_DELTA;
 268                }
 269                /*
 270                 * The object header is a byte of 'type' followed by zero or
 271                 * more bytes of length.  For deltas, the 20 bytes of delta
 272                 * sha1 follows that.
 273                 */
 274                hdrlen = encode_header(obj_type, size, header);
 275                sha1write(f, header, hdrlen);
 276
 277                if (entry->delta) {
 278                        sha1write(f, entry->delta, 20);
 279                        hdrlen += 20;
 280                }
 281                datalen = sha1write_compressed(f, buf, size);
 282                free(buf);
 283        }
 284        else {
 285                struct packed_git *p = entry->in_pack;
 286                use_packed_git(p);
 287
 288                datalen = find_packed_object_size(p, entry->in_pack_offset);
 289                buf = p->pack_base + entry->in_pack_offset;
 290                sha1write(f, buf, datalen);
 291                unuse_packed_git(p);
 292                hdrlen = 0; /* not really */
 293                if (obj_type == OBJ_DELTA)
 294                        reused_delta++;
 295                reused++;
 296        }
 297        if (obj_type == OBJ_DELTA)
 298                written_delta++;
 299        written++;
 300        return hdrlen + datalen;
 301}
 302
 303static unsigned long write_one(struct sha1file *f,
 304                               struct object_entry *e,
 305                               unsigned long offset)
 306{
 307        if (e->offset)
 308                /* offset starts from header size and cannot be zero
 309                 * if it is written already.
 310                 */
 311                return offset;
 312        e->offset = offset;
 313        offset += write_object(f, e);
 314        /* if we are deltified, write out its base object. */
 315        if (e->delta)
 316                offset = write_one(f, e->delta, offset);
 317        return offset;
 318}
 319
 320static void write_pack_file(void)
 321{
 322        int i;
 323        struct sha1file *f;
 324        unsigned long offset;
 325        struct pack_header hdr;
 326
 327        if (!base_name)
 328                f = sha1fd(1, "<stdout>");
 329        else
 330                f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
 331        hdr.hdr_signature = htonl(PACK_SIGNATURE);
 332        hdr.hdr_version = htonl(PACK_VERSION);
 333        hdr.hdr_entries = htonl(nr_objects);
 334        sha1write(f, &hdr, sizeof(hdr));
 335        offset = sizeof(hdr);
 336        for (i = 0; i < nr_objects; i++)
 337                offset = write_one(f, objects + i, offset);
 338
 339        sha1close(f, pack_file_sha1, 1);
 340}
 341
 342static void write_index_file(void)
 343{
 344        int i;
 345        struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
 346        struct object_entry **list = sorted_by_sha;
 347        struct object_entry **last = list + nr_objects;
 348        unsigned int array[256];
 349
 350        /*
 351         * Write the first-level table (the list is sorted,
 352         * but we use a 256-entry lookup to be able to avoid
 353         * having to do eight extra binary search iterations).
 354         */
 355        for (i = 0; i < 256; i++) {
 356                struct object_entry **next = list;
 357                while (next < last) {
 358                        struct object_entry *entry = *next;
 359                        if (entry->sha1[0] != i)
 360                                break;
 361                        next++;
 362                }
 363                array[i] = htonl(next - sorted_by_sha);
 364                list = next;
 365        }
 366        sha1write(f, array, 256 * sizeof(int));
 367
 368        /*
 369         * Write the actual SHA1 entries..
 370         */
 371        list = sorted_by_sha;
 372        for (i = 0; i < nr_objects; i++) {
 373                struct object_entry *entry = *list++;
 374                unsigned int offset = htonl(entry->offset);
 375                sha1write(f, &offset, 4);
 376                sha1write(f, entry->sha1, 20);
 377        }
 378        sha1write(f, pack_file_sha1, 20);
 379        sha1close(f, NULL, 1);
 380}
 381
 382static int add_object_entry(unsigned char *sha1, unsigned int hash)
 383{
 384        unsigned int idx = nr_objects;
 385        struct object_entry *entry;
 386        struct packed_git *p;
 387        unsigned int found_offset = 0;
 388        struct packed_git *found_pack = NULL;
 389
 390        for (p = packed_git; p; p = p->next) {
 391                struct pack_entry e;
 392                if (find_pack_entry_one(sha1, &e, p)) {
 393                        if (incremental)
 394                                return 0;
 395                        if (local && !p->pack_local)
 396                                return 0;
 397                        if (!found_pack) {
 398                                found_offset = e.offset;
 399                                found_pack = e.p;
 400                        }
 401                }
 402        }
 403
 404        if (idx >= nr_alloc) {
 405                unsigned int needed = (idx + 1024) * 3 / 2;
 406                objects = xrealloc(objects, needed * sizeof(*entry));
 407                nr_alloc = needed;
 408        }
 409        entry = objects + idx;
 410        memset(entry, 0, sizeof(*entry));
 411        memcpy(entry->sha1, sha1, 20);
 412        entry->hash = hash;
 413        if (found_pack) {
 414                entry->in_pack = found_pack;
 415                entry->in_pack_offset = found_offset;
 416        }
 417        nr_objects = idx+1;
 418        return 1;
 419}
 420
 421static int locate_object_entry_hash(unsigned char *sha1)
 422{
 423        int i;
 424        unsigned int ui;
 425        memcpy(&ui, sha1, sizeof(unsigned int));
 426        i = ui % object_ix_hashsz;
 427        while (0 < object_ix[i]) {
 428                if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
 429                        return i;
 430                if (++i == object_ix_hashsz)
 431                        i = 0;
 432        }
 433        return -1 - i;
 434}
 435
 436static struct object_entry *locate_object_entry(unsigned char *sha1)
 437{
 438        int i = locate_object_entry_hash(sha1);
 439        if (0 <= i)
 440                return &objects[object_ix[i]-1];
 441        return NULL;
 442}
 443
 444static void check_object(struct object_entry *entry)
 445{
 446        char type[20];
 447
 448        if (entry->in_pack) {
 449                unsigned char base[20];
 450                unsigned long size;
 451                struct object_entry *base_entry;
 452
 453                /* We want in_pack_type even if we do not reuse delta.
 454                 * There is no point not reusing non-delta representations.
 455                 */
 456                check_reuse_pack_delta(entry->in_pack,
 457                                       entry->in_pack_offset,
 458                                       base, &size,
 459                                       &entry->in_pack_type);
 460
 461                /* Check if it is delta, and the base is also an object
 462                 * we are going to pack.  If so we will reuse the existing
 463                 * delta.
 464                 */
 465                if (!no_reuse_delta &&
 466                    entry->in_pack_type == OBJ_DELTA &&
 467                    (base_entry = locate_object_entry(base))) {
 468
 469                        /* Depth value does not matter - find_deltas()
 470                         * will never consider reused delta as the
 471                         * base object to deltify other objects
 472                         * against, in order to avoid circular deltas.
 473                         */
 474
 475                        /* uncompressed size of the delta data */
 476                        entry->size = entry->delta_size = size;
 477                        entry->delta = base_entry;
 478                        entry->type = OBJ_DELTA;
 479
 480                        entry->delta_sibling = base_entry->delta_child;
 481                        base_entry->delta_child = entry;
 482
 483                        return;
 484                }
 485                /* Otherwise we would do the usual */
 486        }
 487
 488        if (sha1_object_info(entry->sha1, type, &entry->size))
 489                die("unable to get type of object %s",
 490                    sha1_to_hex(entry->sha1));
 491
 492        if (!strcmp(type, "commit")) {
 493                entry->type = OBJ_COMMIT;
 494        } else if (!strcmp(type, "tree")) {
 495                entry->type = OBJ_TREE;
 496        } else if (!strcmp(type, "blob")) {
 497                entry->type = OBJ_BLOB;
 498        } else if (!strcmp(type, "tag")) {
 499                entry->type = OBJ_TAG;
 500        } else
 501                die("unable to pack object %s of type %s",
 502                    sha1_to_hex(entry->sha1), type);
 503}
 504
 505static void hash_objects(void)
 506{
 507        int i;
 508        struct object_entry *oe;
 509
 510        object_ix_hashsz = nr_objects * 2;
 511        object_ix = xcalloc(sizeof(int), object_ix_hashsz);
 512        for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
 513                int ix = locate_object_entry_hash(oe->sha1);
 514                if (0 <= ix) {
 515                        error("the same object '%s' added twice",
 516                              sha1_to_hex(oe->sha1));
 517                        continue;
 518                }
 519                ix = -1 - ix;
 520                object_ix[ix] = i + 1;
 521        }
 522}
 523
 524static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 525{
 526        struct object_entry *child = me->delta_child;
 527        unsigned int m = n;
 528        while (child) {
 529                unsigned int c = check_delta_limit(child, n + 1);
 530                if (m < c)
 531                        m = c;
 532                child = child->delta_sibling;
 533        }
 534        return m;
 535}
 536
 537static void get_object_details(void)
 538{
 539        int i;
 540        struct object_entry *entry;
 541
 542        hash_objects();
 543        prepare_pack_ix();
 544        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 545                check_object(entry);
 546        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 547                if (!entry->delta && entry->delta_child)
 548                        entry->delta_limit =
 549                                check_delta_limit(entry, 1);
 550}
 551
 552typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 553
 554static entry_sort_t current_sort;
 555
 556static int sort_comparator(const void *_a, const void *_b)
 557{
 558        struct object_entry *a = *(struct object_entry **)_a;
 559        struct object_entry *b = *(struct object_entry **)_b;
 560        return current_sort(a,b);
 561}
 562
 563static struct object_entry **create_sorted_list(entry_sort_t sort)
 564{
 565        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 566        int i;
 567
 568        for (i = 0; i < nr_objects; i++)
 569                list[i] = objects + i;
 570        current_sort = sort;
 571        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 572        return list;
 573}
 574
 575static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 576{
 577        return memcmp(a->sha1, b->sha1, 20);
 578}
 579
 580static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 581{
 582        if (a->type < b->type)
 583                return -1;
 584        if (a->type > b->type)
 585                return 1;
 586        if (a->hash < b->hash)
 587                return -1;
 588        if (a->hash > b->hash)
 589                return 1;
 590        if (a->size < b->size)
 591                return -1;
 592        if (a->size > b->size)
 593                return 1;
 594        return a < b ? -1 : (a > b);
 595}
 596
 597struct unpacked {
 598        struct object_entry *entry;
 599        void *data;
 600};
 601
 602/*
 603 * We search for deltas _backwards_ in a list sorted by type and
 604 * by size, so that we see progressively smaller and smaller files.
 605 * That's because we prefer deltas to be from the bigger file
 606 * to the smaller - deletes are potentially cheaper, but perhaps
 607 * more importantly, the bigger file is likely the more recent
 608 * one.
 609 */
 610static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
 611{
 612        struct object_entry *cur_entry = cur->entry;
 613        struct object_entry *old_entry = old->entry;
 614        unsigned long size, oldsize, delta_size, sizediff;
 615        long max_size;
 616        void *delta_buf;
 617
 618        /* Don't bother doing diffs between different types */
 619        if (cur_entry->type != old_entry->type)
 620                return -1;
 621
 622        /* If the current object is at edge, take the depth the objects
 623         * that depend on the current object into account -- otherwise
 624         * they would become too deep.
 625         */
 626        if (cur_entry->delta_child) {
 627                if (max_depth <= cur_entry->delta_limit)
 628                        return 0;
 629                max_depth -= cur_entry->delta_limit;
 630        }
 631
 632        size = cur_entry->size;
 633        if (size < 50)
 634                return -1;
 635        oldsize = old_entry->size;
 636        sizediff = oldsize > size ? oldsize - size : size - oldsize;
 637        if (sizediff > size / 8)
 638                return -1;
 639        if (old_entry->depth >= max_depth)
 640                return 0;
 641
 642        /*
 643         * NOTE!
 644         *
 645         * We always delta from the bigger to the smaller, since that's
 646         * more space-efficient (deletes don't have to say _what_ they
 647         * delete).
 648         */
 649        max_size = size / 2 - 20;
 650        if (cur_entry->delta)
 651                max_size = cur_entry->delta_size-1;
 652        if (sizediff >= max_size)
 653                return -1;
 654        delta_buf = diff_delta(old->data, oldsize,
 655                               cur->data, size, &delta_size, max_size);
 656        if (!delta_buf)
 657                return 0;
 658        cur_entry->delta = old_entry;
 659        cur_entry->delta_size = delta_size;
 660        cur_entry->depth = old_entry->depth + 1;
 661        free(delta_buf);
 662        return 0;
 663}
 664
 665static volatile int progress_update = 0;
 666static void progress_interval(int signum)
 667{
 668        signal(SIGALRM, progress_interval);
 669        progress_update = 1;
 670}
 671
 672static void find_deltas(struct object_entry **list, int window, int depth)
 673{
 674        int i, idx;
 675        unsigned int array_size = window * sizeof(struct unpacked);
 676        struct unpacked *array = xmalloc(array_size);
 677
 678        memset(array, 0, array_size);
 679        i = nr_objects;
 680        idx = 0;
 681
 682        while (--i >= 0) {
 683                struct object_entry *entry = list[i];
 684                struct unpacked *n = array + idx;
 685                unsigned long size;
 686                char type[10];
 687                int j;
 688
 689                if (progress_update || i == 0) {
 690                        fprintf(stderr, "Deltifying (%d %d%%)\r",
 691                                nr_objects-i, (nr_objects-i) * 100/nr_objects);
 692                        progress_update = 0;
 693                }
 694
 695                if (entry->delta)
 696                        /* This happens if we decided to reuse existing
 697                         * delta from a pack.  "!no_reuse_delta &&" is implied.
 698                         */
 699                        continue;
 700
 701                free(n->data);
 702                n->entry = entry;
 703                n->data = read_sha1_file(entry->sha1, type, &size);
 704                if (size != entry->size)
 705                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 706
 707                j = window;
 708                while (--j > 0) {
 709                        unsigned int other_idx = idx + j;
 710                        struct unpacked *m;
 711                        if (other_idx >= window)
 712                                other_idx -= window;
 713                        m = array + other_idx;
 714                        if (!m->entry)
 715                                break;
 716                        if (try_delta(n, m, depth) < 0)
 717                                break;
 718                }
 719                idx++;
 720                if (idx >= window)
 721                        idx = 0;
 722        }
 723
 724        if (progress)
 725                fputc('\n', stderr);
 726
 727        for (i = 0; i < window; ++i)
 728                free(array[i].data);
 729        free(array);
 730}
 731
 732static void prepare_pack(int window, int depth)
 733{
 734        get_object_details();
 735        sorted_by_type = create_sorted_list(type_size_sort);
 736        if (window && depth)
 737                find_deltas(sorted_by_type, window+1, depth);
 738        write_pack_file();
 739}
 740
 741static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
 742{
 743        static const char cache[] = "pack-cache/pack-%s.%s";
 744        char *cached_pack, *cached_idx;
 745        int ifd, ofd, ifd_ix = -1;
 746
 747        cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
 748        ifd = open(cached_pack, O_RDONLY);
 749        if (ifd < 0)
 750                return 0;
 751
 752        if (!pack_to_stdout) {
 753                cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
 754                ifd_ix = open(cached_idx, O_RDONLY);
 755                if (ifd_ix < 0) {
 756                        close(ifd);
 757                        return 0;
 758                }
 759        }
 760
 761        if (progress)
 762                fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
 763                        sha1_to_hex(sha1));
 764
 765        if (pack_to_stdout) {
 766                if (copy_fd(ifd, 1))
 767                        exit(1);
 768                close(ifd);
 769        }
 770        else {
 771                char name[PATH_MAX];
 772                snprintf(name, sizeof(name),
 773                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
 774                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 775                if (ofd < 0)
 776                        die("unable to open %s (%s)", name, strerror(errno));
 777                if (copy_fd(ifd, ofd))
 778                        exit(1);
 779                close(ifd);
 780
 781                snprintf(name, sizeof(name),
 782                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
 783                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 784                if (ofd < 0)
 785                        die("unable to open %s (%s)", name, strerror(errno));
 786                if (copy_fd(ifd_ix, ofd))
 787                        exit(1);
 788                close(ifd_ix);
 789                puts(sha1_to_hex(sha1));
 790        }
 791
 792        return 1;
 793}
 794
 795int main(int argc, char **argv)
 796{
 797        SHA_CTX ctx;
 798        char line[PATH_MAX + 20];
 799        int window = 10, depth = 10, pack_to_stdout = 0;
 800        struct object_entry **list;
 801        int i;
 802
 803        setup_git_directory();
 804
 805        for (i = 1; i < argc; i++) {
 806                const char *arg = argv[i];
 807
 808                if (*arg == '-') {
 809                        if (!strcmp("--non-empty", arg)) {
 810                                non_empty = 1;
 811                                continue;
 812                        }
 813                        if (!strcmp("--local", arg)) {
 814                                local = 1;
 815                                continue;
 816                        }
 817                        if (!strcmp("--incremental", arg)) {
 818                                incremental = 1;
 819                                continue;
 820                        }
 821                        if (!strncmp("--window=", arg, 9)) {
 822                                char *end;
 823                                window = strtoul(arg+9, &end, 0);
 824                                if (!arg[9] || *end)
 825                                        usage(pack_usage);
 826                                continue;
 827                        }
 828                        if (!strncmp("--depth=", arg, 8)) {
 829                                char *end;
 830                                depth = strtoul(arg+8, &end, 0);
 831                                if (!arg[8] || *end)
 832                                        usage(pack_usage);
 833                                continue;
 834                        }
 835                        if (!strcmp("-q", arg)) {
 836                                progress = 0;
 837                                continue;
 838                        }
 839                        if (!strcmp("--no-reuse-delta", arg)) {
 840                                no_reuse_delta = 1;
 841                                continue;
 842                        }
 843                        if (!strcmp("--stdout", arg)) {
 844                                pack_to_stdout = 1;
 845                                continue;
 846                        }
 847                        usage(pack_usage);
 848                }
 849                if (base_name)
 850                        usage(pack_usage);
 851                base_name = arg;
 852        }
 853
 854        if (pack_to_stdout != !base_name)
 855                usage(pack_usage);
 856
 857        prepare_packed_git();
 858
 859        if (progress) {
 860                struct itimerval v;
 861                v.it_interval.tv_sec = 1;
 862                v.it_interval.tv_usec = 0;
 863                v.it_value = v.it_interval;
 864                signal(SIGALRM, progress_interval);
 865                setitimer(ITIMER_REAL, &v, NULL);
 866                fprintf(stderr, "Generating pack...\n");
 867        }
 868
 869        while (fgets(line, sizeof(line), stdin) != NULL) {
 870                unsigned int hash;
 871                char *p;
 872                unsigned char sha1[20];
 873
 874                if (progress_update) {
 875                        fprintf(stderr, "Counting objects...%d\r", nr_objects);
 876                        progress_update = 0;
 877                }
 878                if (get_sha1_hex(line, sha1))
 879                        die("expected sha1, got garbage:\n %s", line);
 880                hash = 0;
 881                p = line+40;
 882                while (*p) {
 883                        unsigned char c = *p++;
 884                        if (isspace(c))
 885                                continue;
 886                        hash = hash * 11 + c;
 887                }
 888                add_object_entry(sha1, hash);
 889        }
 890        if (progress)
 891                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
 892        if (non_empty && !nr_objects)
 893                return 0;
 894
 895        sorted_by_sha = create_sorted_list(sha1_sort);
 896        SHA1_Init(&ctx);
 897        list = sorted_by_sha;
 898        for (i = 0; i < nr_objects; i++) {
 899                struct object_entry *entry = *list++;
 900                SHA1_Update(&ctx, entry->sha1, 20);
 901        }
 902        SHA1_Final(object_list_sha1, &ctx);
 903
 904        if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
 905                ;
 906        else {
 907                prepare_pack(window, depth);
 908                if (!pack_to_stdout) {
 909                        write_index_file();
 910                        puts(sha1_to_hex(object_list_sha1));
 911                }
 912        }
 913        if (progress)
 914                fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
 915                        nr_objects, written, written_delta, reused, reused_delta);
 916        return 0;
 917}