7af1776673e0ccbf2a6a25a0c789621ff01c2e71
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "commit.h"
   6#include "tag.h"
   7#include "tree.h"
   8#include "delta.h"
   9#include "pack.h"
  10#include "csum-file.h"
  11#include "tree-walk.h"
  12#include "diff.h"
  13#include "revision.h"
  14#include "list-objects.h"
  15
  16static const char pack_usage[] = "\
  17git-pack-objects [{ -q | --progress | --all-progress }] \n\
  18        [--local] [--incremental] [--window=N] [--depth=N] \n\
  19        [--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\
  20        [--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\
  21        [<ref-list | <object-list]";
  22
  23struct object_entry {
  24        unsigned char sha1[20];
  25        unsigned long size;     /* uncompressed size */
  26        off_t offset;   /* offset into the final pack file;
  27                                 * nonzero if already written.
  28                                 */
  29        unsigned int depth;     /* delta depth */
  30        unsigned int hash;      /* name hint hash */
  31        enum object_type type;
  32        enum object_type in_pack_type;  /* could be delta */
  33        unsigned long delta_size;       /* delta data size (uncompressed) */
  34#define in_pack_header_size delta_size  /* only when reusing pack data */
  35        struct object_entry *delta;     /* delta base object */
  36        struct packed_git *in_pack;     /* already in pack */
  37        off_t in_pack_offset;
  38        struct object_entry *delta_child; /* deltified objects who bases me */
  39        struct object_entry *delta_sibling; /* other deltified objects who
  40                                             * uses the same base as me
  41                                             */
  42        int preferred_base;     /* we do not pack this, but is encouraged to
  43                                 * be used as the base objectto delta huge
  44                                 * objects against.
  45                                 */
  46        uint32_t crc32;         /* crc of raw pack data for this object */
  47};
  48
  49/*
  50 * Objects we are going to pack are collected in objects array (dynamically
  51 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
  52 * in the order we see -- typically rev-list --objects order that gives us
  53 * nice "minimum seek" order.
  54 *
  55 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
  56 * elements in the objects array.  The former is used to build the pack
  57 * index (lists object names in the ascending order to help offset lookup),
  58 * and the latter is used to group similar things together by try_delta()
  59 * heuristics.
  60 */
  61
  62static int non_empty;
  63static int no_reuse_delta;
  64static int local;
  65static int incremental;
  66static int allow_ofs_delta;
  67
  68static struct object_entry *objects;
  69static uint32_t nr_objects, nr_alloc, nr_result;
  70static const char *pack_tmp_name, *idx_tmp_name;
  71static char tmpname[PATH_MAX];
  72static unsigned char pack_file_sha1[20];
  73static int progress = 1;
  74static volatile sig_atomic_t progress_update;
  75static int window = 10;
  76static int pack_to_stdout;
  77static int num_preferred_base;
  78
  79/*
  80 * The object names in objects array are hashed with this hashtable,
  81 * to help looking up the entry by object name.  Binary search from
  82 * sorted_by_sha is also possible but this was easier to code and faster.
  83 * This hashtable is built after all the objects are seen.
  84 */
  85static int *object_ix;
  86static int object_ix_hashsz;
  87
  88/*
  89 * Pack index for existing packs give us easy access to the offsets into
  90 * corresponding pack file where each object's data starts, but the entries
  91 * do not store the size of the compressed representation (uncompressed
  92 * size is easily available by examining the pack entry header).  It is
  93 * also rather expensive to find the sha1 for an object given its offset.
  94 *
  95 * We build a hashtable of existing packs (pack_revindex), and keep reverse
  96 * index here -- pack index file is sorted by object name mapping to offset;
  97 * this pack_revindex[].revindex array is a list of offset/index_nr pairs
  98 * ordered by offset, so if you know the offset of an object, next offset
  99 * is where its packed representation ends and the index_nr can be used to
 100 * get the object sha1 from the main index.
 101 */
 102struct revindex_entry {
 103        off_t offset;
 104        unsigned int nr;
 105};
 106struct pack_revindex {
 107        struct packed_git *p;
 108        struct revindex_entry *revindex;
 109};
 110static struct  pack_revindex *pack_revindex;
 111static int pack_revindex_hashsz;
 112
 113/*
 114 * stats
 115 */
 116static uint32_t written, written_delta;
 117static uint32_t reused, reused_delta;
 118
 119static int pack_revindex_ix(struct packed_git *p)
 120{
 121        unsigned long ui = (unsigned long)p;
 122        int i;
 123
 124        ui = ui ^ (ui >> 16); /* defeat structure alignment */
 125        i = (int)(ui % pack_revindex_hashsz);
 126        while (pack_revindex[i].p) {
 127                if (pack_revindex[i].p == p)
 128                        return i;
 129                if (++i == pack_revindex_hashsz)
 130                        i = 0;
 131        }
 132        return -1 - i;
 133}
 134
 135static void prepare_pack_ix(void)
 136{
 137        int num;
 138        struct packed_git *p;
 139        for (num = 0, p = packed_git; p; p = p->next)
 140                num++;
 141        if (!num)
 142                return;
 143        pack_revindex_hashsz = num * 11;
 144        pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
 145        for (p = packed_git; p; p = p->next) {
 146                num = pack_revindex_ix(p);
 147                num = - 1 - num;
 148                pack_revindex[num].p = p;
 149        }
 150        /* revindex elements are lazily initialized */
 151}
 152
 153static int cmp_offset(const void *a_, const void *b_)
 154{
 155        const struct revindex_entry *a = a_;
 156        const struct revindex_entry *b = b_;
 157        return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
 158}
 159
 160/*
 161 * Ordered list of offsets of objects in the pack.
 162 */
 163static void prepare_pack_revindex(struct pack_revindex *rix)
 164{
 165        struct packed_git *p = rix->p;
 166        int num_ent = p->num_objects;
 167        int i;
 168        const char *index = p->index_data;
 169
 170        rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
 171        index += 4 * 256;
 172
 173        if (p->index_version > 1) {
 174                const uint32_t *off_32 =
 175                        (uint32_t *)(index + 8 + p->num_objects * (20 + 4));
 176                const uint32_t *off_64 = off_32 + p->num_objects;
 177                for (i = 0; i < num_ent; i++) {
 178                        uint32_t off = ntohl(*off_32++);
 179                        if (!(off & 0x80000000)) {
 180                                rix->revindex[i].offset = off;
 181                        } else {
 182                                rix->revindex[i].offset =
 183                                        ((uint64_t)ntohl(*off_64++)) << 32;
 184                                rix->revindex[i].offset |=
 185                                        ntohl(*off_64++);
 186                        }
 187                        rix->revindex[i].nr = i;
 188                }
 189        } else {
 190                for (i = 0; i < num_ent; i++) {
 191                        uint32_t hl = *((uint32_t *)(index + 24 * i));
 192                        rix->revindex[i].offset = ntohl(hl);
 193                        rix->revindex[i].nr = i;
 194                }
 195        }
 196
 197        /* This knows the pack format -- the 20-byte trailer
 198         * follows immediately after the last object data.
 199         */
 200        rix->revindex[num_ent].offset = p->pack_size - 20;
 201        rix->revindex[num_ent].nr = -1;
 202        qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
 203}
 204
 205static struct revindex_entry * find_packed_object(struct packed_git *p,
 206                                                  off_t ofs)
 207{
 208        int num;
 209        int lo, hi;
 210        struct pack_revindex *rix;
 211        struct revindex_entry *revindex;
 212        num = pack_revindex_ix(p);
 213        if (num < 0)
 214                die("internal error: pack revindex uninitialized");
 215        rix = &pack_revindex[num];
 216        if (!rix->revindex)
 217                prepare_pack_revindex(rix);
 218        revindex = rix->revindex;
 219        lo = 0;
 220        hi = p->num_objects + 1;
 221        do {
 222                int mi = (lo + hi) / 2;
 223                if (revindex[mi].offset == ofs) {
 224                        return revindex + mi;
 225                }
 226                else if (ofs < revindex[mi].offset)
 227                        hi = mi;
 228                else
 229                        lo = mi + 1;
 230        } while (lo < hi);
 231        die("internal error: pack revindex corrupt");
 232}
 233
 234static const unsigned char *find_packed_object_name(struct packed_git *p,
 235                                                    off_t ofs)
 236{
 237        struct revindex_entry *entry = find_packed_object(p, ofs);
 238        return nth_packed_object_sha1(p, entry->nr);
 239}
 240
 241static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 242{
 243        unsigned long othersize, delta_size;
 244        enum object_type type;
 245        void *otherbuf = read_sha1_file(entry->delta->sha1, &type, &othersize);
 246        void *delta_buf;
 247
 248        if (!otherbuf)
 249                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
 250        delta_buf = diff_delta(otherbuf, othersize,
 251                               buf, size, &delta_size, 0);
 252        if (!delta_buf || delta_size != entry->delta_size)
 253                die("delta size changed");
 254        free(buf);
 255        free(otherbuf);
 256        return delta_buf;
 257}
 258
 259/*
 260 * The per-object header is a pretty dense thing, which is
 261 *  - first byte: low four bits are "size", then three bits of "type",
 262 *    and the high bit is "size continues".
 263 *  - each byte afterwards: low seven bits are size continuation,
 264 *    with the high bit being "size continues"
 265 */
 266static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
 267{
 268        int n = 1;
 269        unsigned char c;
 270
 271        if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
 272                die("bad type %d", type);
 273
 274        c = (type << 4) | (size & 15);
 275        size >>= 4;
 276        while (size) {
 277                *hdr++ = c | 0x80;
 278                c = size & 0x7f;
 279                size >>= 7;
 280                n++;
 281        }
 282        *hdr = c;
 283        return n;
 284}
 285
 286/*
 287 * we are going to reuse the existing object data as is.  make
 288 * sure it is not corrupt.
 289 */
 290static int check_pack_inflate(struct packed_git *p,
 291                struct pack_window **w_curs,
 292                off_t offset,
 293                off_t len,
 294                unsigned long expect)
 295{
 296        z_stream stream;
 297        unsigned char fakebuf[4096], *in;
 298        int st;
 299
 300        memset(&stream, 0, sizeof(stream));
 301        inflateInit(&stream);
 302        do {
 303                in = use_pack(p, w_curs, offset, &stream.avail_in);
 304                stream.next_in = in;
 305                stream.next_out = fakebuf;
 306                stream.avail_out = sizeof(fakebuf);
 307                st = inflate(&stream, Z_FINISH);
 308                offset += stream.next_in - in;
 309        } while (st == Z_OK || st == Z_BUF_ERROR);
 310        inflateEnd(&stream);
 311        return (st == Z_STREAM_END &&
 312                stream.total_out == expect &&
 313                stream.total_in == len) ? 0 : -1;
 314}
 315
 316static int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
 317                          off_t offset, off_t len, unsigned int nr)
 318{
 319        const uint32_t *index_crc;
 320        uint32_t data_crc = crc32(0, Z_NULL, 0);
 321
 322        do {
 323                unsigned int avail;
 324                void *data = use_pack(p, w_curs, offset, &avail);
 325                if (avail > len)
 326                        avail = len;
 327                data_crc = crc32(data_crc, data, avail);
 328                offset += avail;
 329                len -= avail;
 330        } while (len);
 331
 332        index_crc = p->index_data;
 333        index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
 334
 335        return data_crc != ntohl(*index_crc);
 336}
 337
 338static void copy_pack_data(struct sha1file *f,
 339                struct packed_git *p,
 340                struct pack_window **w_curs,
 341                off_t offset,
 342                off_t len)
 343{
 344        unsigned char *in;
 345        unsigned int avail;
 346
 347        while (len) {
 348                in = use_pack(p, w_curs, offset, &avail);
 349                if (avail > len)
 350                        avail = (unsigned int)len;
 351                sha1write(f, in, avail);
 352                offset += avail;
 353                len -= avail;
 354        }
 355}
 356
 357static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
 358{
 359        z_stream stream;
 360        unsigned char fakebuf[4096];
 361        int st;
 362
 363        memset(&stream, 0, sizeof(stream));
 364        stream.next_in = data;
 365        stream.avail_in = len;
 366        stream.next_out = fakebuf;
 367        stream.avail_out = sizeof(fakebuf);
 368        inflateInit(&stream);
 369
 370        while (1) {
 371                st = inflate(&stream, Z_FINISH);
 372                if (st == Z_STREAM_END || st == Z_OK) {
 373                        st = (stream.total_out == expect &&
 374                              stream.total_in == len) ? 0 : -1;
 375                        break;
 376                }
 377                if (st != Z_BUF_ERROR) {
 378                        st = -1;
 379                        break;
 380                }
 381                stream.next_out = fakebuf;
 382                stream.avail_out = sizeof(fakebuf);
 383        }
 384        inflateEnd(&stream);
 385        return st;
 386}
 387
 388static int revalidate_loose_object(struct object_entry *entry,
 389                                   unsigned char *map,
 390                                   unsigned long mapsize)
 391{
 392        /* we already know this is a loose object with new type header. */
 393        enum object_type type;
 394        unsigned long size, used;
 395
 396        if (pack_to_stdout)
 397                return 0;
 398
 399        used = unpack_object_header_gently(map, mapsize, &type, &size);
 400        if (!used)
 401                return -1;
 402        map += used;
 403        mapsize -= used;
 404        return check_loose_inflate(map, mapsize, size);
 405}
 406
 407static unsigned long write_object(struct sha1file *f,
 408                                  struct object_entry *entry)
 409{
 410        unsigned long size;
 411        enum object_type type;
 412        void *buf;
 413        unsigned char header[10];
 414        unsigned hdrlen;
 415        off_t datalen;
 416        enum object_type obj_type;
 417        int to_reuse = 0;
 418
 419        if (!pack_to_stdout)
 420                crc32_begin(f);
 421
 422        obj_type = entry->type;
 423        if (! entry->in_pack)
 424                to_reuse = 0;   /* can't reuse what we don't have */
 425        else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
 426                to_reuse = 1;   /* check_object() decided it for us */
 427        else if (obj_type != entry->in_pack_type)
 428                to_reuse = 0;   /* pack has delta which is unusable */
 429        else if (entry->delta)
 430                to_reuse = 0;   /* we want to pack afresh */
 431        else
 432                to_reuse = 1;   /* we have it in-pack undeltified,
 433                                 * and we do not need to deltify it.
 434                                 */
 435
 436        if (!entry->in_pack && !entry->delta) {
 437                unsigned char *map;
 438                unsigned long mapsize;
 439                map = map_sha1_file(entry->sha1, &mapsize);
 440                if (map && !legacy_loose_object(map)) {
 441                        /* We can copy straight into the pack file */
 442                        if (revalidate_loose_object(entry, map, mapsize))
 443                                die("corrupt loose object %s",
 444                                    sha1_to_hex(entry->sha1));
 445                        sha1write(f, map, mapsize);
 446                        munmap(map, mapsize);
 447                        written++;
 448                        reused++;
 449                        return mapsize;
 450                }
 451                if (map)
 452                        munmap(map, mapsize);
 453        }
 454
 455        if (!to_reuse) {
 456                buf = read_sha1_file(entry->sha1, &type, &size);
 457                if (!buf)
 458                        die("unable to read %s", sha1_to_hex(entry->sha1));
 459                if (size != entry->size)
 460                        die("object %s size inconsistency (%lu vs %lu)",
 461                            sha1_to_hex(entry->sha1), size, entry->size);
 462                if (entry->delta) {
 463                        buf = delta_against(buf, size, entry);
 464                        size = entry->delta_size;
 465                        obj_type = (allow_ofs_delta && entry->delta->offset) ?
 466                                OBJ_OFS_DELTA : OBJ_REF_DELTA;
 467                }
 468                /*
 469                 * The object header is a byte of 'type' followed by zero or
 470                 * more bytes of length.
 471                 */
 472                hdrlen = encode_header(obj_type, size, header);
 473                sha1write(f, header, hdrlen);
 474
 475                if (obj_type == OBJ_OFS_DELTA) {
 476                        /*
 477                         * Deltas with relative base contain an additional
 478                         * encoding of the relative offset for the delta
 479                         * base from this object's position in the pack.
 480                         */
 481                        off_t ofs = entry->offset - entry->delta->offset;
 482                        unsigned pos = sizeof(header) - 1;
 483                        header[pos] = ofs & 127;
 484                        while (ofs >>= 7)
 485                                header[--pos] = 128 | (--ofs & 127);
 486                        sha1write(f, header + pos, sizeof(header) - pos);
 487                        hdrlen += sizeof(header) - pos;
 488                } else if (obj_type == OBJ_REF_DELTA) {
 489                        /*
 490                         * Deltas with a base reference contain
 491                         * an additional 20 bytes for the base sha1.
 492                         */
 493                        sha1write(f, entry->delta->sha1, 20);
 494                        hdrlen += 20;
 495                }
 496                datalen = sha1write_compressed(f, buf, size);
 497                free(buf);
 498        }
 499        else {
 500                struct packed_git *p = entry->in_pack;
 501                struct pack_window *w_curs = NULL;
 502                struct revindex_entry *revidx;
 503                off_t offset;
 504
 505                if (entry->delta) {
 506                        obj_type = (allow_ofs_delta && entry->delta->offset) ?
 507                                OBJ_OFS_DELTA : OBJ_REF_DELTA;
 508                        reused_delta++;
 509                }
 510                hdrlen = encode_header(obj_type, entry->size, header);
 511                sha1write(f, header, hdrlen);
 512                if (obj_type == OBJ_OFS_DELTA) {
 513                        off_t ofs = entry->offset - entry->delta->offset;
 514                        unsigned pos = sizeof(header) - 1;
 515                        header[pos] = ofs & 127;
 516                        while (ofs >>= 7)
 517                                header[--pos] = 128 | (--ofs & 127);
 518                        sha1write(f, header + pos, sizeof(header) - pos);
 519                        hdrlen += sizeof(header) - pos;
 520                } else if (obj_type == OBJ_REF_DELTA) {
 521                        sha1write(f, entry->delta->sha1, 20);
 522                        hdrlen += 20;
 523                }
 524
 525                offset = entry->in_pack_offset;
 526                revidx = find_packed_object(p, offset);
 527                datalen = revidx[1].offset - offset;
 528                if (!pack_to_stdout && p->index_version > 1 &&
 529                    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
 530                        die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
 531                offset += entry->in_pack_header_size;
 532                datalen -= entry->in_pack_header_size;
 533                if (!pack_to_stdout && p->index_version == 1 &&
 534                    check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
 535                        die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
 536                copy_pack_data(f, p, &w_curs, offset, datalen);
 537                unuse_pack(&w_curs);
 538                reused++;
 539        }
 540        if (entry->delta)
 541                written_delta++;
 542        written++;
 543        if (!pack_to_stdout)
 544                entry->crc32 = crc32_end(f);
 545        return hdrlen + datalen;
 546}
 547
 548static off_t write_one(struct sha1file *f,
 549                               struct object_entry *e,
 550                               off_t offset)
 551{
 552        unsigned long size;
 553
 554        /* offset is non zero if object is written already. */
 555        if (e->offset || e->preferred_base)
 556                return offset;
 557
 558        /* if we are deltified, write out base object first. */
 559        if (e->delta)
 560                offset = write_one(f, e->delta, offset);
 561
 562        e->offset = offset;
 563        size = write_object(f, e);
 564
 565        /* make sure off_t is sufficiently large not to wrap */
 566        if (offset > offset + size)
 567                die("pack too large for current definition of off_t");
 568        return offset + size;
 569}
 570
 571static off_t write_pack_file(void)
 572{
 573        uint32_t i;
 574        struct sha1file *f;
 575        off_t offset, last_obj_offset = 0;
 576        struct pack_header hdr;
 577        unsigned last_percent = 999;
 578        int do_progress = progress;
 579
 580        if (pack_to_stdout) {
 581                f = sha1fd(1, "<stdout>");
 582                do_progress >>= 1;
 583        } else {
 584                int fd;
 585                snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
 586                fd = mkstemp(tmpname);
 587                if (fd < 0)
 588                        die("unable to create %s: %s\n", tmpname, strerror(errno));
 589                pack_tmp_name = xstrdup(tmpname);
 590                f = sha1fd(fd, pack_tmp_name);
 591        }
 592
 593        if (do_progress)
 594                fprintf(stderr, "Writing %u objects.\n", nr_result);
 595
 596        hdr.hdr_signature = htonl(PACK_SIGNATURE);
 597        hdr.hdr_version = htonl(PACK_VERSION);
 598        hdr.hdr_entries = htonl(nr_result);
 599        sha1write(f, &hdr, sizeof(hdr));
 600        offset = sizeof(hdr);
 601        if (!nr_result)
 602                goto done;
 603        for (i = 0; i < nr_objects; i++) {
 604                last_obj_offset = offset;
 605                offset = write_one(f, objects + i, offset);
 606                if (do_progress) {
 607                        unsigned percent = written * 100 / nr_result;
 608                        if (progress_update || percent != last_percent) {
 609                                fprintf(stderr, "%4u%% (%u/%u) done\r",
 610                                        percent, written, nr_result);
 611                                progress_update = 0;
 612                                last_percent = percent;
 613                        }
 614                }
 615        }
 616        if (do_progress)
 617                fputc('\n', stderr);
 618 done:
 619        if (written != nr_result)
 620                die("wrote %u objects while expecting %u", written, nr_result);
 621        sha1close(f, pack_file_sha1, 1);
 622
 623        return last_obj_offset;
 624}
 625
 626static int sha1_sort(const void *_a, const void *_b)
 627{
 628        const struct object_entry *a = *(struct object_entry **)_a;
 629        const struct object_entry *b = *(struct object_entry **)_b;
 630        return hashcmp(a->sha1, b->sha1);
 631}
 632
 633static uint32_t index_default_version = 1;
 634static uint32_t index_off32_limit = 0x7fffffff;
 635
 636static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 637{
 638        struct sha1file *f;
 639        struct object_entry **sorted_by_sha, **list, **last;
 640        uint32_t array[256];
 641        uint32_t i, index_version;
 642        SHA_CTX ctx;
 643        int fd;
 644
 645        snprintf(tmpname, sizeof(tmpname), "tmp_idx_XXXXXX");
 646        fd = mkstemp(tmpname);
 647        if (fd < 0)
 648                die("unable to create %s: %s\n", tmpname, strerror(errno));
 649        idx_tmp_name = xstrdup(tmpname);
 650        f = sha1fd(fd, idx_tmp_name);
 651
 652        if (nr_result) {
 653                uint32_t j = 0;
 654                sorted_by_sha =
 655                        xcalloc(nr_result, sizeof(struct object_entry *));
 656                for (i = 0; i < nr_objects; i++)
 657                        if (!objects[i].preferred_base)
 658                                sorted_by_sha[j++] = objects + i;
 659                if (j != nr_result)
 660                        die("listed %u objects while expecting %u", j, nr_result);
 661                qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
 662                list = sorted_by_sha;
 663                last = sorted_by_sha + nr_result;
 664        } else
 665                sorted_by_sha = list = last = NULL;
 666
 667        /* if last object's offset is >= 2^31 we should use index V2 */
 668        index_version = (last_obj_offset >> 31) ? 2 : index_default_version;
 669
 670        /* index versions 2 and above need a header */
 671        if (index_version >= 2) {
 672                struct pack_idx_header hdr;
 673                hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
 674                hdr.idx_version = htonl(index_version);
 675                sha1write(f, &hdr, sizeof(hdr));
 676        }
 677
 678        /*
 679         * Write the first-level table (the list is sorted,
 680         * but we use a 256-entry lookup to be able to avoid
 681         * having to do eight extra binary search iterations).
 682         */
 683        for (i = 0; i < 256; i++) {
 684                struct object_entry **next = list;
 685                while (next < last) {
 686                        struct object_entry *entry = *next;
 687                        if (entry->sha1[0] != i)
 688                                break;
 689                        next++;
 690                }
 691                array[i] = htonl(next - sorted_by_sha);
 692                list = next;
 693        }
 694        sha1write(f, array, 256 * 4);
 695
 696        /* Compute the SHA1 hash of sorted object names. */
 697        SHA1_Init(&ctx);
 698
 699        /* Write the actual SHA1 entries. */
 700        list = sorted_by_sha;
 701        for (i = 0; i < nr_result; i++) {
 702                struct object_entry *entry = *list++;
 703                if (index_version < 2) {
 704                        uint32_t offset = htonl(entry->offset);
 705                        sha1write(f, &offset, 4);
 706                }
 707                sha1write(f, entry->sha1, 20);
 708                SHA1_Update(&ctx, entry->sha1, 20);
 709        }
 710
 711        if (index_version >= 2) {
 712                unsigned int nr_large_offset = 0;
 713
 714                /* write the crc32 table */
 715                list = sorted_by_sha;
 716                for (i = 0; i < nr_objects; i++) {
 717                        struct object_entry *entry = *list++;
 718                        uint32_t crc32_val = htonl(entry->crc32);
 719                        sha1write(f, &crc32_val, 4);
 720                }
 721
 722                /* write the 32-bit offset table */
 723                list = sorted_by_sha;
 724                for (i = 0; i < nr_objects; i++) {
 725                        struct object_entry *entry = *list++;
 726                        uint32_t offset = (entry->offset <= index_off32_limit) ?
 727                                entry->offset : (0x80000000 | nr_large_offset++);
 728                        offset = htonl(offset);
 729                        sha1write(f, &offset, 4);
 730                }
 731
 732                /* write the large offset table */
 733                list = sorted_by_sha;
 734                while (nr_large_offset) {
 735                        struct object_entry *entry = *list++;
 736                        uint64_t offset = entry->offset;
 737                        if (offset > index_off32_limit) {
 738                                uint32_t split[2];
 739                                split[0]        = htonl(offset >> 32);
 740                                split[1] = htonl(offset & 0xffffffff);
 741                                sha1write(f, split, 8);
 742                                nr_large_offset--;
 743                        }
 744                }
 745        }
 746
 747        sha1write(f, pack_file_sha1, 20);
 748        sha1close(f, NULL, 1);
 749        free(sorted_by_sha);
 750        SHA1_Final(sha1, &ctx);
 751}
 752
 753static int locate_object_entry_hash(const unsigned char *sha1)
 754{
 755        int i;
 756        unsigned int ui;
 757        memcpy(&ui, sha1, sizeof(unsigned int));
 758        i = ui % object_ix_hashsz;
 759        while (0 < object_ix[i]) {
 760                if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1))
 761                        return i;
 762                if (++i == object_ix_hashsz)
 763                        i = 0;
 764        }
 765        return -1 - i;
 766}
 767
 768static struct object_entry *locate_object_entry(const unsigned char *sha1)
 769{
 770        int i;
 771
 772        if (!object_ix_hashsz)
 773                return NULL;
 774
 775        i = locate_object_entry_hash(sha1);
 776        if (0 <= i)
 777                return &objects[object_ix[i]-1];
 778        return NULL;
 779}
 780
 781static void rehash_objects(void)
 782{
 783        uint32_t i;
 784        struct object_entry *oe;
 785
 786        object_ix_hashsz = nr_objects * 3;
 787        if (object_ix_hashsz < 1024)
 788                object_ix_hashsz = 1024;
 789        object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
 790        memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
 791        for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
 792                int ix = locate_object_entry_hash(oe->sha1);
 793                if (0 <= ix)
 794                        continue;
 795                ix = -1 - ix;
 796                object_ix[ix] = i + 1;
 797        }
 798}
 799
 800static unsigned name_hash(const char *name)
 801{
 802        unsigned char c;
 803        unsigned hash = 0;
 804
 805        /*
 806         * This effectively just creates a sortable number from the
 807         * last sixteen non-whitespace characters. Last characters
 808         * count "most", so things that end in ".c" sort together.
 809         */
 810        while ((c = *name++) != 0) {
 811                if (isspace(c))
 812                        continue;
 813                hash = (hash >> 2) + (c << 24);
 814        }
 815        return hash;
 816}
 817
 818static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
 819{
 820        struct object_entry *entry;
 821        struct packed_git *p, *found_pack = NULL;
 822        off_t found_offset = 0;
 823        int ix;
 824
 825        ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
 826        if (ix >= 0) {
 827                if (exclude) {
 828                        entry = objects + object_ix[ix] - 1;
 829                        if (!entry->preferred_base)
 830                                nr_result--;
 831                        entry->preferred_base = 1;
 832                }
 833                return 0;
 834        }
 835
 836        if (!exclude) {
 837                for (p = packed_git; p; p = p->next) {
 838                        off_t offset = find_pack_entry_one(sha1, p);
 839                        if (offset) {
 840                                if (incremental)
 841                                        return 0;
 842                                if (local && !p->pack_local)
 843                                        return 0;
 844                                if (!found_pack) {
 845                                        found_offset = offset;
 846                                        found_pack = p;
 847                                }
 848                        }
 849                }
 850        }
 851
 852        if (nr_objects >= nr_alloc) {
 853                nr_alloc = (nr_alloc  + 1024) * 3 / 2;
 854                objects = xrealloc(objects, nr_alloc * sizeof(*entry));
 855        }
 856
 857        entry = objects + nr_objects++;
 858        memset(entry, 0, sizeof(*entry));
 859        hashcpy(entry->sha1, sha1);
 860        entry->hash = hash;
 861        if (exclude)
 862                entry->preferred_base = 1;
 863        else
 864                nr_result++;
 865        if (found_pack) {
 866                entry->in_pack = found_pack;
 867                entry->in_pack_offset = found_offset;
 868        }
 869
 870        if (object_ix_hashsz * 3 <= nr_objects * 4)
 871                rehash_objects();
 872        else
 873                object_ix[-1 - ix] = nr_objects;
 874
 875        if (progress_update) {
 876                fprintf(stderr, "Counting objects...%u\r", nr_objects);
 877                progress_update = 0;
 878        }
 879
 880        return 1;
 881}
 882
 883struct pbase_tree_cache {
 884        unsigned char sha1[20];
 885        int ref;
 886        int temporary;
 887        void *tree_data;
 888        unsigned long tree_size;
 889};
 890
 891static struct pbase_tree_cache *(pbase_tree_cache[256]);
 892static int pbase_tree_cache_ix(const unsigned char *sha1)
 893{
 894        return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
 895}
 896static int pbase_tree_cache_ix_incr(int ix)
 897{
 898        return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
 899}
 900
 901static struct pbase_tree {
 902        struct pbase_tree *next;
 903        /* This is a phony "cache" entry; we are not
 904         * going to evict it nor find it through _get()
 905         * mechanism -- this is for the toplevel node that
 906         * would almost always change with any commit.
 907         */
 908        struct pbase_tree_cache pcache;
 909} *pbase_tree;
 910
 911static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
 912{
 913        struct pbase_tree_cache *ent, *nent;
 914        void *data;
 915        unsigned long size;
 916        enum object_type type;
 917        int neigh;
 918        int my_ix = pbase_tree_cache_ix(sha1);
 919        int available_ix = -1;
 920
 921        /* pbase-tree-cache acts as a limited hashtable.
 922         * your object will be found at your index or within a few
 923         * slots after that slot if it is cached.
 924         */
 925        for (neigh = 0; neigh < 8; neigh++) {
 926                ent = pbase_tree_cache[my_ix];
 927                if (ent && !hashcmp(ent->sha1, sha1)) {
 928                        ent->ref++;
 929                        return ent;
 930                }
 931                else if (((available_ix < 0) && (!ent || !ent->ref)) ||
 932                         ((0 <= available_ix) &&
 933                          (!ent && pbase_tree_cache[available_ix])))
 934                        available_ix = my_ix;
 935                if (!ent)
 936                        break;
 937                my_ix = pbase_tree_cache_ix_incr(my_ix);
 938        }
 939
 940        /* Did not find one.  Either we got a bogus request or
 941         * we need to read and perhaps cache.
 942         */
 943        data = read_sha1_file(sha1, &type, &size);
 944        if (!data)
 945                return NULL;
 946        if (type != OBJ_TREE) {
 947                free(data);
 948                return NULL;
 949        }
 950
 951        /* We need to either cache or return a throwaway copy */
 952
 953        if (available_ix < 0)
 954                ent = NULL;
 955        else {
 956                ent = pbase_tree_cache[available_ix];
 957                my_ix = available_ix;
 958        }
 959
 960        if (!ent) {
 961                nent = xmalloc(sizeof(*nent));
 962                nent->temporary = (available_ix < 0);
 963        }
 964        else {
 965                /* evict and reuse */
 966                free(ent->tree_data);
 967                nent = ent;
 968        }
 969        hashcpy(nent->sha1, sha1);
 970        nent->tree_data = data;
 971        nent->tree_size = size;
 972        nent->ref = 1;
 973        if (!nent->temporary)
 974                pbase_tree_cache[my_ix] = nent;
 975        return nent;
 976}
 977
 978static void pbase_tree_put(struct pbase_tree_cache *cache)
 979{
 980        if (!cache->temporary) {
 981                cache->ref--;
 982                return;
 983        }
 984        free(cache->tree_data);
 985        free(cache);
 986}
 987
 988static int name_cmp_len(const char *name)
 989{
 990        int i;
 991        for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
 992                ;
 993        return i;
 994}
 995
 996static void add_pbase_object(struct tree_desc *tree,
 997                             const char *name,
 998                             int cmplen,
 999                             const char *fullname)
1000{
1001        struct name_entry entry;
1002        int cmp;
1003
1004        while (tree_entry(tree,&entry)) {
1005                cmp = tree_entry_len(entry.path, entry.sha1) != cmplen ? 1 :
1006                      memcmp(name, entry.path, cmplen);
1007                if (cmp > 0)
1008                        continue;
1009                if (cmp < 0)
1010                        return;
1011                if (name[cmplen] != '/') {
1012                        unsigned hash = name_hash(fullname);
1013                        add_object_entry(entry.sha1, hash, 1);
1014                        return;
1015                }
1016                if (S_ISDIR(entry.mode)) {
1017                        struct tree_desc sub;
1018                        struct pbase_tree_cache *tree;
1019                        const char *down = name+cmplen+1;
1020                        int downlen = name_cmp_len(down);
1021
1022                        tree = pbase_tree_get(entry.sha1);
1023                        if (!tree)
1024                                return;
1025                        init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1026
1027                        add_pbase_object(&sub, down, downlen, fullname);
1028                        pbase_tree_put(tree);
1029                }
1030        }
1031}
1032
1033static unsigned *done_pbase_paths;
1034static int done_pbase_paths_num;
1035static int done_pbase_paths_alloc;
1036static int done_pbase_path_pos(unsigned hash)
1037{
1038        int lo = 0;
1039        int hi = done_pbase_paths_num;
1040        while (lo < hi) {
1041                int mi = (hi + lo) / 2;
1042                if (done_pbase_paths[mi] == hash)
1043                        return mi;
1044                if (done_pbase_paths[mi] < hash)
1045                        hi = mi;
1046                else
1047                        lo = mi + 1;
1048        }
1049        return -lo-1;
1050}
1051
1052static int check_pbase_path(unsigned hash)
1053{
1054        int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
1055        if (0 <= pos)
1056                return 1;
1057        pos = -pos - 1;
1058        if (done_pbase_paths_alloc <= done_pbase_paths_num) {
1059                done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
1060                done_pbase_paths = xrealloc(done_pbase_paths,
1061                                            done_pbase_paths_alloc *
1062                                            sizeof(unsigned));
1063        }
1064        done_pbase_paths_num++;
1065        if (pos < done_pbase_paths_num)
1066                memmove(done_pbase_paths + pos + 1,
1067                        done_pbase_paths + pos,
1068                        (done_pbase_paths_num - pos - 1) * sizeof(unsigned));
1069        done_pbase_paths[pos] = hash;
1070        return 0;
1071}
1072
1073static void add_preferred_base_object(const char *name, unsigned hash)
1074{
1075        struct pbase_tree *it;
1076        int cmplen;
1077
1078        if (!num_preferred_base || check_pbase_path(hash))
1079                return;
1080
1081        cmplen = name_cmp_len(name);
1082        for (it = pbase_tree; it; it = it->next) {
1083                if (cmplen == 0) {
1084                        add_object_entry(it->pcache.sha1, 0, 1);
1085                }
1086                else {
1087                        struct tree_desc tree;
1088                        init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1089                        add_pbase_object(&tree, name, cmplen, name);
1090                }
1091        }
1092}
1093
1094static void add_preferred_base(unsigned char *sha1)
1095{
1096        struct pbase_tree *it;
1097        void *data;
1098        unsigned long size;
1099        unsigned char tree_sha1[20];
1100
1101        if (window <= num_preferred_base++)
1102                return;
1103
1104        data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
1105        if (!data)
1106                return;
1107
1108        for (it = pbase_tree; it; it = it->next) {
1109                if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1110                        free(data);
1111                        return;
1112                }
1113        }
1114
1115        it = xcalloc(1, sizeof(*it));
1116        it->next = pbase_tree;
1117        pbase_tree = it;
1118
1119        hashcpy(it->pcache.sha1, tree_sha1);
1120        it->pcache.tree_data = data;
1121        it->pcache.tree_size = size;
1122}
1123
1124static void check_object(struct object_entry *entry)
1125{
1126        if (entry->in_pack && !entry->preferred_base) {
1127                struct packed_git *p = entry->in_pack;
1128                struct pack_window *w_curs = NULL;
1129                unsigned long size, used;
1130                unsigned int avail;
1131                unsigned char *buf;
1132                struct object_entry *base_entry = NULL;
1133
1134                buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1135
1136                /* We want in_pack_type even if we do not reuse delta.
1137                 * There is no point not reusing non-delta representations.
1138                 */
1139                used = unpack_object_header_gently(buf, avail,
1140                                                   &entry->in_pack_type, &size);
1141
1142                /* Check if it is delta, and the base is also an object
1143                 * we are going to pack.  If so we will reuse the existing
1144                 * delta.
1145                 */
1146                if (!no_reuse_delta) {
1147                        unsigned char c;
1148                        const unsigned char *base_name;
1149                        off_t ofs;
1150                        unsigned long used_0;
1151                        /* there is at least 20 bytes left in the pack */
1152                        switch (entry->in_pack_type) {
1153                        case OBJ_REF_DELTA:
1154                                base_name = use_pack(p, &w_curs,
1155                                        entry->in_pack_offset + used, NULL);
1156                                used += 20;
1157                                break;
1158                        case OBJ_OFS_DELTA:
1159                                buf = use_pack(p, &w_curs,
1160                                        entry->in_pack_offset + used, NULL);
1161                                used_0 = 0;
1162                                c = buf[used_0++];
1163                                ofs = c & 127;
1164                                while (c & 128) {
1165                                        ofs += 1;
1166                                        if (!ofs || MSB(ofs, 7))
1167                                                die("delta base offset overflow in pack for %s",
1168                                                    sha1_to_hex(entry->sha1));
1169                                        c = buf[used_0++];
1170                                        ofs = (ofs << 7) + (c & 127);
1171                                }
1172                                if (ofs >= entry->in_pack_offset)
1173                                        die("delta base offset out of bound for %s",
1174                                            sha1_to_hex(entry->sha1));
1175                                ofs = entry->in_pack_offset - ofs;
1176                                base_name = find_packed_object_name(p, ofs);
1177                                used += used_0;
1178                                break;
1179                        default:
1180                                base_name = NULL;
1181                        }
1182                        if (base_name)
1183                                base_entry = locate_object_entry(base_name);
1184                }
1185                unuse_pack(&w_curs);
1186                entry->in_pack_header_size = used;
1187
1188                if (base_entry) {
1189
1190                        /* Depth value does not matter - find_deltas()
1191                         * will never consider reused delta as the
1192                         * base object to deltify other objects
1193                         * against, in order to avoid circular deltas.
1194                         */
1195
1196                        /* uncompressed size of the delta data */
1197                        entry->size = size;
1198                        entry->delta = base_entry;
1199                        entry->type = entry->in_pack_type;
1200
1201                        entry->delta_sibling = base_entry->delta_child;
1202                        base_entry->delta_child = entry;
1203
1204                        return;
1205                }
1206                /* Otherwise we would do the usual */
1207        }
1208
1209        entry->type = sha1_object_info(entry->sha1, &entry->size);
1210        if (entry->type < 0)
1211                die("unable to get type of object %s",
1212                    sha1_to_hex(entry->sha1));
1213}
1214
1215static void get_object_details(void)
1216{
1217        uint32_t i;
1218        struct object_entry *entry;
1219
1220        prepare_pack_ix();
1221        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
1222                check_object(entry);
1223}
1224
1225static int type_size_sort(const void *_a, const void *_b)
1226{
1227        const struct object_entry *a = *(struct object_entry **)_a;
1228        const struct object_entry *b = *(struct object_entry **)_b;
1229
1230        if (a->type < b->type)
1231                return -1;
1232        if (a->type > b->type)
1233                return 1;
1234        if (a->hash < b->hash)
1235                return -1;
1236        if (a->hash > b->hash)
1237                return 1;
1238        if (a->preferred_base < b->preferred_base)
1239                return -1;
1240        if (a->preferred_base > b->preferred_base)
1241                return 1;
1242        if (a->size < b->size)
1243                return -1;
1244        if (a->size > b->size)
1245                return 1;
1246        return a > b ? -1 : (a < b);  /* newest last */
1247}
1248
1249struct unpacked {
1250        struct object_entry *entry;
1251        void *data;
1252        struct delta_index *index;
1253};
1254
1255/*
1256 * We search for deltas _backwards_ in a list sorted by type and
1257 * by size, so that we see progressively smaller and smaller files.
1258 * That's because we prefer deltas to be from the bigger file
1259 * to the smaller - deletes are potentially cheaper, but perhaps
1260 * more importantly, the bigger file is likely the more recent
1261 * one.
1262 */
1263static int try_delta(struct unpacked *trg, struct unpacked *src,
1264                     unsigned max_depth)
1265{
1266        struct object_entry *trg_entry = trg->entry;
1267        struct object_entry *src_entry = src->entry;
1268        unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1269        enum object_type type;
1270        void *delta_buf;
1271
1272        /* Don't bother doing diffs between different types */
1273        if (trg_entry->type != src_entry->type)
1274                return -1;
1275
1276        /* We do not compute delta to *create* objects we are not
1277         * going to pack.
1278         */
1279        if (trg_entry->preferred_base)
1280                return -1;
1281
1282        /*
1283         * We do not bother to try a delta that we discarded
1284         * on an earlier try, but only when reusing delta data.
1285         */
1286        if (!no_reuse_delta && trg_entry->in_pack &&
1287            trg_entry->in_pack == src_entry->in_pack &&
1288            trg_entry->in_pack_type != OBJ_REF_DELTA &&
1289            trg_entry->in_pack_type != OBJ_OFS_DELTA)
1290                return 0;
1291
1292        /* Let's not bust the allowed depth. */
1293        if (src_entry->depth >= max_depth)
1294                return 0;
1295
1296        /* Now some size filtering heuristics. */
1297        trg_size = trg_entry->size;
1298        max_size = trg_size/2 - 20;
1299        max_size = max_size * (max_depth - src_entry->depth) / max_depth;
1300        if (max_size == 0)
1301                return 0;
1302        if (trg_entry->delta && trg_entry->delta_size <= max_size)
1303                max_size = trg_entry->delta_size-1;
1304        src_size = src_entry->size;
1305        sizediff = src_size < trg_size ? trg_size - src_size : 0;
1306        if (sizediff >= max_size)
1307                return 0;
1308
1309        /* Load data if not already done */
1310        if (!trg->data) {
1311                trg->data = read_sha1_file(trg_entry->sha1, &type, &sz);
1312                if (sz != trg_size)
1313                        die("object %s inconsistent object length (%lu vs %lu)",
1314                            sha1_to_hex(trg_entry->sha1), sz, trg_size);
1315        }
1316        if (!src->data) {
1317                src->data = read_sha1_file(src_entry->sha1, &type, &sz);
1318                if (sz != src_size)
1319                        die("object %s inconsistent object length (%lu vs %lu)",
1320                            sha1_to_hex(src_entry->sha1), sz, src_size);
1321        }
1322        if (!src->index) {
1323                src->index = create_delta_index(src->data, src_size);
1324                if (!src->index)
1325                        die("out of memory");
1326        }
1327
1328        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1329        if (!delta_buf)
1330                return 0;
1331
1332        trg_entry->delta = src_entry;
1333        trg_entry->delta_size = delta_size;
1334        trg_entry->depth = src_entry->depth + 1;
1335        free(delta_buf);
1336        return 1;
1337}
1338
1339static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1340{
1341        struct object_entry *child = me->delta_child;
1342        unsigned int m = n;
1343        while (child) {
1344                unsigned int c = check_delta_limit(child, n + 1);
1345                if (m < c)
1346                        m = c;
1347                child = child->delta_sibling;
1348        }
1349        return m;
1350}
1351
1352static void find_deltas(struct object_entry **list, int window, int depth)
1353{
1354        uint32_t i = nr_objects, idx = 0, processed = 0;
1355        unsigned int array_size = window * sizeof(struct unpacked);
1356        struct unpacked *array;
1357        unsigned last_percent = 999;
1358        int max_depth;
1359
1360        if (!nr_objects)
1361                return;
1362        array = xmalloc(array_size);
1363        memset(array, 0, array_size);
1364        if (progress)
1365                fprintf(stderr, "Deltifying %u objects.\n", nr_result);
1366
1367        do {
1368                struct object_entry *entry = list[--i];
1369                struct unpacked *n = array + idx;
1370                int j;
1371
1372                if (!entry->preferred_base)
1373                        processed++;
1374
1375                if (progress) {
1376                        unsigned percent = processed * 100 / nr_result;
1377                        if (percent != last_percent || progress_update) {
1378                                fprintf(stderr, "%4u%% (%u/%u) done\r",
1379                                        percent, processed, nr_result);
1380                                progress_update = 0;
1381                                last_percent = percent;
1382                        }
1383                }
1384
1385                if (entry->delta)
1386                        /* This happens if we decided to reuse existing
1387                         * delta from a pack.  "!no_reuse_delta &&" is implied.
1388                         */
1389                        continue;
1390
1391                if (entry->size < 50)
1392                        continue;
1393                free_delta_index(n->index);
1394                n->index = NULL;
1395                free(n->data);
1396                n->data = NULL;
1397                n->entry = entry;
1398
1399                /*
1400                 * If the current object is at pack edge, take the depth the
1401                 * objects that depend on the current object into account
1402                 * otherwise they would become too deep.
1403                 */
1404                max_depth = depth;
1405                if (entry->delta_child) {
1406                        max_depth -= check_delta_limit(entry, 0);
1407                        if (max_depth <= 0)
1408                                goto next;
1409                }
1410
1411                j = window;
1412                while (--j > 0) {
1413                        uint32_t other_idx = idx + j;
1414                        struct unpacked *m;
1415                        if (other_idx >= window)
1416                                other_idx -= window;
1417                        m = array + other_idx;
1418                        if (!m->entry)
1419                                break;
1420                        if (try_delta(n, m, max_depth) < 0)
1421                                break;
1422                }
1423
1424                /* if we made n a delta, and if n is already at max
1425                 * depth, leaving it in the window is pointless.  we
1426                 * should evict it first.
1427                 */
1428                if (entry->delta && depth <= entry->depth)
1429                        continue;
1430
1431                next:
1432                idx++;
1433                if (idx >= window)
1434                        idx = 0;
1435        } while (i > 0);
1436
1437        if (progress)
1438                fputc('\n', stderr);
1439
1440        for (i = 0; i < window; ++i) {
1441                free_delta_index(array[i].index);
1442                free(array[i].data);
1443        }
1444        free(array);
1445}
1446
1447static void prepare_pack(int window, int depth)
1448{
1449        struct object_entry **delta_list;
1450        uint32_t i;
1451
1452        get_object_details();
1453
1454        if (!window || !depth)
1455                return;
1456
1457        delta_list = xmalloc(nr_objects * sizeof(*delta_list));
1458        for (i = 0; i < nr_objects; i++)
1459                delta_list[i] = objects + i;
1460        qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort);
1461        find_deltas(delta_list, window+1, depth);
1462        free(delta_list);
1463}
1464
1465static void progress_interval(int signum)
1466{
1467        progress_update = 1;
1468}
1469
1470static void setup_progress_signal(void)
1471{
1472        struct sigaction sa;
1473        struct itimerval v;
1474
1475        memset(&sa, 0, sizeof(sa));
1476        sa.sa_handler = progress_interval;
1477        sigemptyset(&sa.sa_mask);
1478        sa.sa_flags = SA_RESTART;
1479        sigaction(SIGALRM, &sa, NULL);
1480
1481        v.it_interval.tv_sec = 1;
1482        v.it_interval.tv_usec = 0;
1483        v.it_value = v.it_interval;
1484        setitimer(ITIMER_REAL, &v, NULL);
1485}
1486
1487static int git_pack_config(const char *k, const char *v)
1488{
1489        if(!strcmp(k, "pack.window")) {
1490                window = git_config_int(k, v);
1491                return 0;
1492        }
1493        return git_default_config(k, v);
1494}
1495
1496static void read_object_list_from_stdin(void)
1497{
1498        char line[40 + 1 + PATH_MAX + 2];
1499        unsigned char sha1[20];
1500        unsigned hash;
1501
1502        for (;;) {
1503                if (!fgets(line, sizeof(line), stdin)) {
1504                        if (feof(stdin))
1505                                break;
1506                        if (!ferror(stdin))
1507                                die("fgets returned NULL, not EOF, not error!");
1508                        if (errno != EINTR)
1509                                die("fgets: %s", strerror(errno));
1510                        clearerr(stdin);
1511                        continue;
1512                }
1513                if (line[0] == '-') {
1514                        if (get_sha1_hex(line+1, sha1))
1515                                die("expected edge sha1, got garbage:\n %s",
1516                                    line);
1517                        add_preferred_base(sha1);
1518                        continue;
1519                }
1520                if (get_sha1_hex(line, sha1))
1521                        die("expected sha1, got garbage:\n %s", line);
1522
1523                hash = name_hash(line+41);
1524                add_preferred_base_object(line+41, hash);
1525                add_object_entry(sha1, hash, 0);
1526        }
1527}
1528
1529static void show_commit(struct commit *commit)
1530{
1531        add_object_entry(commit->object.sha1, 0, 0);
1532}
1533
1534static void show_object(struct object_array_entry *p)
1535{
1536        unsigned hash = name_hash(p->name);
1537        add_preferred_base_object(p->name, hash);
1538        add_object_entry(p->item->sha1, hash, 0);
1539}
1540
1541static void show_edge(struct commit *commit)
1542{
1543        add_preferred_base(commit->object.sha1);
1544}
1545
1546static void get_object_list(int ac, const char **av)
1547{
1548        struct rev_info revs;
1549        char line[1000];
1550        int flags = 0;
1551
1552        init_revisions(&revs, NULL);
1553        save_commit_buffer = 0;
1554        track_object_refs = 0;
1555        setup_revisions(ac, av, &revs, NULL);
1556
1557        while (fgets(line, sizeof(line), stdin) != NULL) {
1558                int len = strlen(line);
1559                if (line[len - 1] == '\n')
1560                        line[--len] = 0;
1561                if (!len)
1562                        break;
1563                if (*line == '-') {
1564                        if (!strcmp(line, "--not")) {
1565                                flags ^= UNINTERESTING;
1566                                continue;
1567                        }
1568                        die("not a rev '%s'", line);
1569                }
1570                if (handle_revision_arg(line, &revs, flags, 1))
1571                        die("bad revision '%s'", line);
1572        }
1573
1574        prepare_revision_walk(&revs);
1575        mark_edges_uninteresting(revs.commits, &revs, show_edge);
1576        traverse_commit_list(&revs, show_commit, show_object);
1577}
1578
1579int cmd_pack_objects(int argc, const char **argv, const char *prefix)
1580{
1581        int depth = 10;
1582        int use_internal_rev_list = 0;
1583        int thin = 0;
1584        uint32_t i;
1585        off_t last_obj_offset;
1586        const char *base_name = NULL;
1587        const char **rp_av;
1588        int rp_ac_alloc = 64;
1589        int rp_ac;
1590
1591        rp_av = xcalloc(rp_ac_alloc, sizeof(*rp_av));
1592
1593        rp_av[0] = "pack-objects";
1594        rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
1595        rp_ac = 2;
1596
1597        git_config(git_pack_config);
1598
1599        progress = isatty(2);
1600        for (i = 1; i < argc; i++) {
1601                const char *arg = argv[i];
1602
1603                if (*arg != '-')
1604                        break;
1605
1606                if (!strcmp("--non-empty", arg)) {
1607                        non_empty = 1;
1608                        continue;
1609                }
1610                if (!strcmp("--local", arg)) {
1611                        local = 1;
1612                        continue;
1613                }
1614                if (!strcmp("--incremental", arg)) {
1615                        incremental = 1;
1616                        continue;
1617                }
1618                if (!prefixcmp(arg, "--window=")) {
1619                        char *end;
1620                        window = strtoul(arg+9, &end, 0);
1621                        if (!arg[9] || *end)
1622                                usage(pack_usage);
1623                        continue;
1624                }
1625                if (!prefixcmp(arg, "--depth=")) {
1626                        char *end;
1627                        depth = strtoul(arg+8, &end, 0);
1628                        if (!arg[8] || *end)
1629                                usage(pack_usage);
1630                        continue;
1631                }
1632                if (!strcmp("--progress", arg)) {
1633                        progress = 1;
1634                        continue;
1635                }
1636                if (!strcmp("--all-progress", arg)) {
1637                        progress = 2;
1638                        continue;
1639                }
1640                if (!strcmp("-q", arg)) {
1641                        progress = 0;
1642                        continue;
1643                }
1644                if (!strcmp("--no-reuse-delta", arg)) {
1645                        no_reuse_delta = 1;
1646                        continue;
1647                }
1648                if (!strcmp("--delta-base-offset", arg)) {
1649                        allow_ofs_delta = 1;
1650                        continue;
1651                }
1652                if (!strcmp("--stdout", arg)) {
1653                        pack_to_stdout = 1;
1654                        continue;
1655                }
1656                if (!strcmp("--revs", arg)) {
1657                        use_internal_rev_list = 1;
1658                        continue;
1659                }
1660                if (!strcmp("--unpacked", arg) ||
1661                    !prefixcmp(arg, "--unpacked=") ||
1662                    !strcmp("--reflog", arg) ||
1663                    !strcmp("--all", arg)) {
1664                        use_internal_rev_list = 1;
1665                        if (rp_ac >= rp_ac_alloc - 1) {
1666                                rp_ac_alloc = alloc_nr(rp_ac_alloc);
1667                                rp_av = xrealloc(rp_av,
1668                                                 rp_ac_alloc * sizeof(*rp_av));
1669                        }
1670                        rp_av[rp_ac++] = arg;
1671                        continue;
1672                }
1673                if (!strcmp("--thin", arg)) {
1674                        use_internal_rev_list = 1;
1675                        thin = 1;
1676                        rp_av[1] = "--objects-edge";
1677                        continue;
1678                }
1679                if (!prefixcmp(arg, "--index-version=")) {
1680                        char *c;
1681                        index_default_version = strtoul(arg + 16, &c, 10);
1682                        if (index_default_version > 2)
1683                                die("bad %s", arg);
1684                        if (*c == ',')
1685                                index_off32_limit = strtoul(c+1, &c, 0);
1686                        if (*c || index_off32_limit & 0x80000000)
1687                                die("bad %s", arg);
1688                        continue;
1689                }
1690                usage(pack_usage);
1691        }
1692
1693        /* Traditionally "pack-objects [options] base extra" failed;
1694         * we would however want to take refs parameter that would
1695         * have been given to upstream rev-list ourselves, which means
1696         * we somehow want to say what the base name is.  So the
1697         * syntax would be:
1698         *
1699         * pack-objects [options] base <refs...>
1700         *
1701         * in other words, we would treat the first non-option as the
1702         * base_name and send everything else to the internal revision
1703         * walker.
1704         */
1705
1706        if (!pack_to_stdout)
1707                base_name = argv[i++];
1708
1709        if (pack_to_stdout != !base_name)
1710                usage(pack_usage);
1711
1712        if (!pack_to_stdout && thin)
1713                die("--thin cannot be used to build an indexable pack.");
1714
1715        prepare_packed_git();
1716
1717        if (progress) {
1718                fprintf(stderr, "Generating pack...\n");
1719                setup_progress_signal();
1720        }
1721
1722        if (!use_internal_rev_list)
1723                read_object_list_from_stdin();
1724        else {
1725                rp_av[rp_ac] = NULL;
1726                get_object_list(rp_ac, rp_av);
1727        }
1728
1729        if (progress)
1730                fprintf(stderr, "Done counting %u objects.\n", nr_objects);
1731        if (non_empty && !nr_result)
1732                return 0;
1733        if (progress && (nr_objects != nr_result))
1734                fprintf(stderr, "Result has %u objects.\n", nr_result);
1735        if (nr_result)
1736                prepare_pack(window, depth);
1737        if (progress == 1 && pack_to_stdout) {
1738                /* the other end usually displays progress itself */
1739                struct itimerval v = {{0,},};
1740                setitimer(ITIMER_REAL, &v, NULL);
1741                signal(SIGALRM, SIG_IGN );
1742                progress_update = 0;
1743        }
1744        last_obj_offset = write_pack_file();
1745        if (!pack_to_stdout) {
1746                unsigned char object_list_sha1[20];
1747                write_index_file(last_obj_offset, object_list_sha1);
1748                snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
1749                         base_name, sha1_to_hex(object_list_sha1));
1750                if (rename(pack_tmp_name, tmpname))
1751                        die("unable to rename temporary pack file: %s",
1752                            strerror(errno));
1753                snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
1754                         base_name, sha1_to_hex(object_list_sha1));
1755                if (rename(idx_tmp_name, tmpname))
1756                        die("unable to rename temporary index file: %s",
1757                            strerror(errno));
1758                puts(sha1_to_hex(object_list_sha1));
1759        }
1760        if (progress)
1761                fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
1762                        written, written_delta, reused, reused_delta);
1763        return 0;
1764}