builtin-pack-objects.con commit fast-import: Fix crash when referencing already existing objects (20f546a)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "commit.h"
   6#include "tag.h"
   7#include "tree.h"
   8#include "delta.h"
   9#include "pack.h"
  10#include "csum-file.h"
  11#include "tree-walk.h"
  12#include "diff.h"
  13#include "revision.h"
  14#include "list-objects.h"
  15#include "progress.h"
  16
  17static const char pack_usage[] = "\
  18git-pack-objects [{ -q | --progress | --all-progress }] \n\
  19        [--local] [--incremental] [--window=N] [--depth=N] \n\
  20        [--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\
  21        [--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\
  22        [<ref-list | <object-list]";
  23
  24struct object_entry {
  25        unsigned char sha1[20];
  26        uint32_t crc32;         /* crc of raw pack data for this object */
  27        off_t offset;           /* offset into the final pack file */
  28        unsigned long size;     /* uncompressed size */
  29        unsigned int hash;      /* name hint hash */
  30        unsigned int depth;     /* delta depth */
  31        struct packed_git *in_pack;     /* already in pack */
  32        off_t in_pack_offset;
  33        struct object_entry *delta;     /* delta base object */
  34        struct object_entry *delta_child; /* deltified objects who bases me */
  35        struct object_entry *delta_sibling; /* other deltified objects who
  36                                             * uses the same base as me
  37                                             */
  38        unsigned long delta_size;       /* delta data size (uncompressed) */
  39        enum object_type type;
  40        enum object_type in_pack_type;  /* could be delta */
  41        unsigned char in_pack_header_size;
  42        unsigned char preferred_base; /* we do not pack this, but is available
  43                                       * to be used as the base objectto delta
  44                                       * objects against.
  45                                       */
  46};
  47
  48/*
  49 * Objects we are going to pack are collected in objects array (dynamically
  50 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
  51 * in the order we see -- typically rev-list --objects order that gives us
  52 * nice "minimum seek" order.
  53 */
  54static struct object_entry *objects;
  55static uint32_t nr_objects, nr_alloc, nr_result;
  56
  57static int non_empty;
  58static int no_reuse_delta;
  59static int local;
  60static int incremental;
  61static int allow_ofs_delta;
  62static const char *pack_tmp_name, *idx_tmp_name;
  63static char tmpname[PATH_MAX];
  64static unsigned char pack_file_sha1[20];
  65static int progress = 1;
  66static int window = 10;
  67static int depth = 50;
  68static int pack_to_stdout;
  69static int num_preferred_base;
  70static struct progress progress_state;
  71
  72/*
  73 * The object names in objects array are hashed with this hashtable,
  74 * to help looking up the entry by object name.
  75 * This hashtable is built after all the objects are seen.
  76 */
  77static int *object_ix;
  78static int object_ix_hashsz;
  79
  80/*
  81 * Pack index for existing packs give us easy access to the offsets into
  82 * corresponding pack file where each object's data starts, but the entries
  83 * do not store the size of the compressed representation (uncompressed
  84 * size is easily available by examining the pack entry header).  It is
  85 * also rather expensive to find the sha1 for an object given its offset.
  86 *
  87 * We build a hashtable of existing packs (pack_revindex), and keep reverse
  88 * index here -- pack index file is sorted by object name mapping to offset;
  89 * this pack_revindex[].revindex array is a list of offset/index_nr pairs
  90 * ordered by offset, so if you know the offset of an object, next offset
  91 * is where its packed representation ends and the index_nr can be used to
  92 * get the object sha1 from the main index.
  93 */
  94struct revindex_entry {
  95        off_t offset;
  96        unsigned int nr;
  97};
  98struct pack_revindex {
  99        struct packed_git *p;
 100        struct revindex_entry *revindex;
 101};
 102static struct  pack_revindex *pack_revindex;
 103static int pack_revindex_hashsz;
 104
 105/*
 106 * stats
 107 */
 108static uint32_t written, written_delta;
 109static uint32_t reused, reused_delta;
 110
 111static int pack_revindex_ix(struct packed_git *p)
 112{
 113        unsigned long ui = (unsigned long)p;
 114        int i;
 115
 116        ui = ui ^ (ui >> 16); /* defeat structure alignment */
 117        i = (int)(ui % pack_revindex_hashsz);
 118        while (pack_revindex[i].p) {
 119                if (pack_revindex[i].p == p)
 120                        return i;
 121                if (++i == pack_revindex_hashsz)
 122                        i = 0;
 123        }
 124        return -1 - i;
 125}
 126
 127static void prepare_pack_ix(void)
 128{
 129        int num;
 130        struct packed_git *p;
 131        for (num = 0, p = packed_git; p; p = p->next)
 132                num++;
 133        if (!num)
 134                return;
 135        pack_revindex_hashsz = num * 11;
 136        pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
 137        for (p = packed_git; p; p = p->next) {
 138                num = pack_revindex_ix(p);
 139                num = - 1 - num;
 140                pack_revindex[num].p = p;
 141        }
 142        /* revindex elements are lazily initialized */
 143}
 144
 145static int cmp_offset(const void *a_, const void *b_)
 146{
 147        const struct revindex_entry *a = a_;
 148        const struct revindex_entry *b = b_;
 149        return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
 150}
 151
 152/*
 153 * Ordered list of offsets of objects in the pack.
 154 */
 155static void prepare_pack_revindex(struct pack_revindex *rix)
 156{
 157        struct packed_git *p = rix->p;
 158        int num_ent = p->num_objects;
 159        int i;
 160        const char *index = p->index_data;
 161
 162        rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
 163        index += 4 * 256;
 164
 165        if (p->index_version > 1) {
 166                const uint32_t *off_32 =
 167                        (uint32_t *)(index + 8 + p->num_objects * (20 + 4));
 168                const uint32_t *off_64 = off_32 + p->num_objects;
 169                for (i = 0; i < num_ent; i++) {
 170                        uint32_t off = ntohl(*off_32++);
 171                        if (!(off & 0x80000000)) {
 172                                rix->revindex[i].offset = off;
 173                        } else {
 174                                rix->revindex[i].offset =
 175                                        ((uint64_t)ntohl(*off_64++)) << 32;
 176                                rix->revindex[i].offset |=
 177                                        ntohl(*off_64++);
 178                        }
 179                        rix->revindex[i].nr = i;
 180                }
 181        } else {
 182                for (i = 0; i < num_ent; i++) {
 183                        uint32_t hl = *((uint32_t *)(index + 24 * i));
 184                        rix->revindex[i].offset = ntohl(hl);
 185                        rix->revindex[i].nr = i;
 186                }
 187        }
 188
 189        /* This knows the pack format -- the 20-byte trailer
 190         * follows immediately after the last object data.
 191         */
 192        rix->revindex[num_ent].offset = p->pack_size - 20;
 193        rix->revindex[num_ent].nr = -1;
 194        qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
 195}
 196
 197static struct revindex_entry * find_packed_object(struct packed_git *p,
 198                                                  off_t ofs)
 199{
 200        int num;
 201        int lo, hi;
 202        struct pack_revindex *rix;
 203        struct revindex_entry *revindex;
 204        num = pack_revindex_ix(p);
 205        if (num < 0)
 206                die("internal error: pack revindex uninitialized");
 207        rix = &pack_revindex[num];
 208        if (!rix->revindex)
 209                prepare_pack_revindex(rix);
 210        revindex = rix->revindex;
 211        lo = 0;
 212        hi = p->num_objects + 1;
 213        do {
 214                int mi = (lo + hi) / 2;
 215                if (revindex[mi].offset == ofs) {
 216                        return revindex + mi;
 217                }
 218                else if (ofs < revindex[mi].offset)
 219                        hi = mi;
 220                else
 221                        lo = mi + 1;
 222        } while (lo < hi);
 223        die("internal error: pack revindex corrupt");
 224}
 225
 226static const unsigned char *find_packed_object_name(struct packed_git *p,
 227                                                    off_t ofs)
 228{
 229        struct revindex_entry *entry = find_packed_object(p, ofs);
 230        return nth_packed_object_sha1(p, entry->nr);
 231}
 232
 233static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 234{
 235        unsigned long othersize, delta_size;
 236        enum object_type type;
 237        void *otherbuf = read_sha1_file(entry->delta->sha1, &type, &othersize);
 238        void *delta_buf;
 239
 240        if (!otherbuf)
 241                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
 242        delta_buf = diff_delta(otherbuf, othersize,
 243                               buf, size, &delta_size, 0);
 244        if (!delta_buf || delta_size != entry->delta_size)
 245                die("delta size changed");
 246        free(buf);
 247        free(otherbuf);
 248        return delta_buf;
 249}
 250
 251/*
 252 * The per-object header is a pretty dense thing, which is
 253 *  - first byte: low four bits are "size", then three bits of "type",
 254 *    and the high bit is "size continues".
 255 *  - each byte afterwards: low seven bits are size continuation,
 256 *    with the high bit being "size continues"
 257 */
 258static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
 259{
 260        int n = 1;
 261        unsigned char c;
 262
 263        if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
 264                die("bad type %d", type);
 265
 266        c = (type << 4) | (size & 15);
 267        size >>= 4;
 268        while (size) {
 269                *hdr++ = c | 0x80;
 270                c = size & 0x7f;
 271                size >>= 7;
 272                n++;
 273        }
 274        *hdr = c;
 275        return n;
 276}
 277
 278/*
 279 * we are going to reuse the existing object data as is.  make
 280 * sure it is not corrupt.
 281 */
 282static int check_pack_inflate(struct packed_git *p,
 283                struct pack_window **w_curs,
 284                off_t offset,
 285                off_t len,
 286                unsigned long expect)
 287{
 288        z_stream stream;
 289        unsigned char fakebuf[4096], *in;
 290        int st;
 291
 292        memset(&stream, 0, sizeof(stream));
 293        inflateInit(&stream);
 294        do {
 295                in = use_pack(p, w_curs, offset, &stream.avail_in);
 296                stream.next_in = in;
 297                stream.next_out = fakebuf;
 298                stream.avail_out = sizeof(fakebuf);
 299                st = inflate(&stream, Z_FINISH);
 300                offset += stream.next_in - in;
 301        } while (st == Z_OK || st == Z_BUF_ERROR);
 302        inflateEnd(&stream);
 303        return (st == Z_STREAM_END &&
 304                stream.total_out == expect &&
 305                stream.total_in == len) ? 0 : -1;
 306}
 307
 308static int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
 309                          off_t offset, off_t len, unsigned int nr)
 310{
 311        const uint32_t *index_crc;
 312        uint32_t data_crc = crc32(0, Z_NULL, 0);
 313
 314        do {
 315                unsigned int avail;
 316                void *data = use_pack(p, w_curs, offset, &avail);
 317                if (avail > len)
 318                        avail = len;
 319                data_crc = crc32(data_crc, data, avail);
 320                offset += avail;
 321                len -= avail;
 322        } while (len);
 323
 324        index_crc = p->index_data;
 325        index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
 326
 327        return data_crc != ntohl(*index_crc);
 328}
 329
 330static void copy_pack_data(struct sha1file *f,
 331                struct packed_git *p,
 332                struct pack_window **w_curs,
 333                off_t offset,
 334                off_t len)
 335{
 336        unsigned char *in;
 337        unsigned int avail;
 338
 339        while (len) {
 340                in = use_pack(p, w_curs, offset, &avail);
 341                if (avail > len)
 342                        avail = (unsigned int)len;
 343                sha1write(f, in, avail);
 344                offset += avail;
 345                len -= avail;
 346        }
 347}
 348
 349static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
 350{
 351        z_stream stream;
 352        unsigned char fakebuf[4096];
 353        int st;
 354
 355        memset(&stream, 0, sizeof(stream));
 356        stream.next_in = data;
 357        stream.avail_in = len;
 358        stream.next_out = fakebuf;
 359        stream.avail_out = sizeof(fakebuf);
 360        inflateInit(&stream);
 361
 362        while (1) {
 363                st = inflate(&stream, Z_FINISH);
 364                if (st == Z_STREAM_END || st == Z_OK) {
 365                        st = (stream.total_out == expect &&
 366                              stream.total_in == len) ? 0 : -1;
 367                        break;
 368                }
 369                if (st != Z_BUF_ERROR) {
 370                        st = -1;
 371                        break;
 372                }
 373                stream.next_out = fakebuf;
 374                stream.avail_out = sizeof(fakebuf);
 375        }
 376        inflateEnd(&stream);
 377        return st;
 378}
 379
 380static int revalidate_loose_object(struct object_entry *entry,
 381                                   unsigned char *map,
 382                                   unsigned long mapsize)
 383{
 384        /* we already know this is a loose object with new type header. */
 385        enum object_type type;
 386        unsigned long size, used;
 387
 388        if (pack_to_stdout)
 389                return 0;
 390
 391        used = unpack_object_header_gently(map, mapsize, &type, &size);
 392        if (!used)
 393                return -1;
 394        map += used;
 395        mapsize -= used;
 396        return check_loose_inflate(map, mapsize, size);
 397}
 398
 399static unsigned long write_object(struct sha1file *f,
 400                                  struct object_entry *entry)
 401{
 402        unsigned long size;
 403        enum object_type type;
 404        void *buf;
 405        unsigned char header[10];
 406        unsigned hdrlen;
 407        off_t datalen;
 408        enum object_type obj_type;
 409        int to_reuse = 0;
 410
 411        if (!pack_to_stdout)
 412                crc32_begin(f);
 413
 414        obj_type = entry->type;
 415        if (! entry->in_pack)
 416                to_reuse = 0;   /* can't reuse what we don't have */
 417        else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
 418                to_reuse = 1;   /* check_object() decided it for us */
 419        else if (obj_type != entry->in_pack_type)
 420                to_reuse = 0;   /* pack has delta which is unusable */
 421        else if (entry->delta)
 422                to_reuse = 0;   /* we want to pack afresh */
 423        else
 424                to_reuse = 1;   /* we have it in-pack undeltified,
 425                                 * and we do not need to deltify it.
 426                                 */
 427
 428        if (!entry->in_pack && !entry->delta) {
 429                unsigned char *map;
 430                unsigned long mapsize;
 431                map = map_sha1_file(entry->sha1, &mapsize);
 432                if (map && !legacy_loose_object(map)) {
 433                        /* We can copy straight into the pack file */
 434                        if (revalidate_loose_object(entry, map, mapsize))
 435                                die("corrupt loose object %s",
 436                                    sha1_to_hex(entry->sha1));
 437                        sha1write(f, map, mapsize);
 438                        munmap(map, mapsize);
 439                        written++;
 440                        reused++;
 441                        return mapsize;
 442                }
 443                if (map)
 444                        munmap(map, mapsize);
 445        }
 446
 447        if (!to_reuse) {
 448                buf = read_sha1_file(entry->sha1, &type, &size);
 449                if (!buf)
 450                        die("unable to read %s", sha1_to_hex(entry->sha1));
 451                if (size != entry->size)
 452                        die("object %s size inconsistency (%lu vs %lu)",
 453                            sha1_to_hex(entry->sha1), size, entry->size);
 454                if (entry->delta) {
 455                        buf = delta_against(buf, size, entry);
 456                        size = entry->delta_size;
 457                        obj_type = (allow_ofs_delta && entry->delta->offset) ?
 458                                OBJ_OFS_DELTA : OBJ_REF_DELTA;
 459                }
 460                /*
 461                 * The object header is a byte of 'type' followed by zero or
 462                 * more bytes of length.
 463                 */
 464                hdrlen = encode_header(obj_type, size, header);
 465                sha1write(f, header, hdrlen);
 466
 467                if (obj_type == OBJ_OFS_DELTA) {
 468                        /*
 469                         * Deltas with relative base contain an additional
 470                         * encoding of the relative offset for the delta
 471                         * base from this object's position in the pack.
 472                         */
 473                        off_t ofs = entry->offset - entry->delta->offset;
 474                        unsigned pos = sizeof(header) - 1;
 475                        header[pos] = ofs & 127;
 476                        while (ofs >>= 7)
 477                                header[--pos] = 128 | (--ofs & 127);
 478                        sha1write(f, header + pos, sizeof(header) - pos);
 479                        hdrlen += sizeof(header) - pos;
 480                } else if (obj_type == OBJ_REF_DELTA) {
 481                        /*
 482                         * Deltas with a base reference contain
 483                         * an additional 20 bytes for the base sha1.
 484                         */
 485                        sha1write(f, entry->delta->sha1, 20);
 486                        hdrlen += 20;
 487                }
 488                datalen = sha1write_compressed(f, buf, size);
 489                free(buf);
 490        }
 491        else {
 492                struct packed_git *p = entry->in_pack;
 493                struct pack_window *w_curs = NULL;
 494                struct revindex_entry *revidx;
 495                off_t offset;
 496
 497                if (entry->delta) {
 498                        obj_type = (allow_ofs_delta && entry->delta->offset) ?
 499                                OBJ_OFS_DELTA : OBJ_REF_DELTA;
 500                        reused_delta++;
 501                }
 502                hdrlen = encode_header(obj_type, entry->size, header);
 503                sha1write(f, header, hdrlen);
 504                if (obj_type == OBJ_OFS_DELTA) {
 505                        off_t ofs = entry->offset - entry->delta->offset;
 506                        unsigned pos = sizeof(header) - 1;
 507                        header[pos] = ofs & 127;
 508                        while (ofs >>= 7)
 509                                header[--pos] = 128 | (--ofs & 127);
 510                        sha1write(f, header + pos, sizeof(header) - pos);
 511                        hdrlen += sizeof(header) - pos;
 512                } else if (obj_type == OBJ_REF_DELTA) {
 513                        sha1write(f, entry->delta->sha1, 20);
 514                        hdrlen += 20;
 515                }
 516
 517                offset = entry->in_pack_offset;
 518                revidx = find_packed_object(p, offset);
 519                datalen = revidx[1].offset - offset;
 520                if (!pack_to_stdout && p->index_version > 1 &&
 521                    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
 522                        die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
 523                offset += entry->in_pack_header_size;
 524                datalen -= entry->in_pack_header_size;
 525                if (!pack_to_stdout && p->index_version == 1 &&
 526                    check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
 527                        die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
 528                copy_pack_data(f, p, &w_curs, offset, datalen);
 529                unuse_pack(&w_curs);
 530                reused++;
 531        }
 532        if (entry->delta)
 533                written_delta++;
 534        written++;
 535        if (!pack_to_stdout)
 536                entry->crc32 = crc32_end(f);
 537        return hdrlen + datalen;
 538}
 539
 540static off_t write_one(struct sha1file *f,
 541                               struct object_entry *e,
 542                               off_t offset)
 543{
 544        unsigned long size;
 545
 546        /* offset is non zero if object is written already. */
 547        if (e->offset || e->preferred_base)
 548                return offset;
 549
 550        /* if we are deltified, write out base object first. */
 551        if (e->delta)
 552                offset = write_one(f, e->delta, offset);
 553
 554        e->offset = offset;
 555        size = write_object(f, e);
 556
 557        /* make sure off_t is sufficiently large not to wrap */
 558        if (offset > offset + size)
 559                die("pack too large for current definition of off_t");
 560        return offset + size;
 561}
 562
 563static int open_object_dir_tmp(const char *path)
 564{
 565    snprintf(tmpname, sizeof(tmpname), "%s/%s", get_object_directory(), path);
 566    return mkstemp(tmpname);
 567}
 568
 569static off_t write_pack_file(void)
 570{
 571        uint32_t i;
 572        struct sha1file *f;
 573        off_t offset, last_obj_offset = 0;
 574        struct pack_header hdr;
 575        int do_progress = progress;
 576
 577        if (pack_to_stdout) {
 578                f = sha1fd(1, "<stdout>");
 579                do_progress >>= 1;
 580        } else {
 581                int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
 582                if (fd < 0)
 583                        die("unable to create %s: %s\n", tmpname, strerror(errno));
 584                pack_tmp_name = xstrdup(tmpname);
 585                f = sha1fd(fd, pack_tmp_name);
 586        }
 587
 588        if (do_progress)
 589                start_progress(&progress_state, "Writing %u objects...", "", nr_result);
 590
 591        hdr.hdr_signature = htonl(PACK_SIGNATURE);
 592        hdr.hdr_version = htonl(PACK_VERSION);
 593        hdr.hdr_entries = htonl(nr_result);
 594        sha1write(f, &hdr, sizeof(hdr));
 595        offset = sizeof(hdr);
 596        if (!nr_result)
 597                goto done;
 598        for (i = 0; i < nr_objects; i++) {
 599                last_obj_offset = offset;
 600                offset = write_one(f, objects + i, offset);
 601                if (do_progress)
 602                        display_progress(&progress_state, written);
 603        }
 604        if (do_progress)
 605                stop_progress(&progress_state);
 606 done:
 607        if (written != nr_result)
 608                die("wrote %u objects while expecting %u", written, nr_result);
 609        sha1close(f, pack_file_sha1, 1);
 610
 611        return last_obj_offset;
 612}
 613
 614static int sha1_sort(const void *_a, const void *_b)
 615{
 616        const struct object_entry *a = *(struct object_entry **)_a;
 617        const struct object_entry *b = *(struct object_entry **)_b;
 618        return hashcmp(a->sha1, b->sha1);
 619}
 620
 621static uint32_t index_default_version = 1;
 622static uint32_t index_off32_limit = 0x7fffffff;
 623
 624static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 625{
 626        struct sha1file *f;
 627        struct object_entry **sorted_by_sha, **list, **last;
 628        uint32_t array[256];
 629        uint32_t i, index_version;
 630        SHA_CTX ctx;
 631
 632        int fd = open_object_dir_tmp("tmp_idx_XXXXXX");
 633        if (fd < 0)
 634                die("unable to create %s: %s\n", tmpname, strerror(errno));
 635        idx_tmp_name = xstrdup(tmpname);
 636        f = sha1fd(fd, idx_tmp_name);
 637
 638        if (nr_result) {
 639                uint32_t j = 0;
 640                sorted_by_sha =
 641                        xcalloc(nr_result, sizeof(struct object_entry *));
 642                for (i = 0; i < nr_objects; i++)
 643                        if (!objects[i].preferred_base)
 644                                sorted_by_sha[j++] = objects + i;
 645                if (j != nr_result)
 646                        die("listed %u objects while expecting %u", j, nr_result);
 647                qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
 648                list = sorted_by_sha;
 649                last = sorted_by_sha + nr_result;
 650        } else
 651                sorted_by_sha = list = last = NULL;
 652
 653        /* if last object's offset is >= 2^31 we should use index V2 */
 654        index_version = (last_obj_offset >> 31) ? 2 : index_default_version;
 655
 656        /* index versions 2 and above need a header */
 657        if (index_version >= 2) {
 658                struct pack_idx_header hdr;
 659                hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
 660                hdr.idx_version = htonl(index_version);
 661                sha1write(f, &hdr, sizeof(hdr));
 662        }
 663
 664        /*
 665         * Write the first-level table (the list is sorted,
 666         * but we use a 256-entry lookup to be able to avoid
 667         * having to do eight extra binary search iterations).
 668         */
 669        for (i = 0; i < 256; i++) {
 670                struct object_entry **next = list;
 671                while (next < last) {
 672                        struct object_entry *entry = *next;
 673                        if (entry->sha1[0] != i)
 674                                break;
 675                        next++;
 676                }
 677                array[i] = htonl(next - sorted_by_sha);
 678                list = next;
 679        }
 680        sha1write(f, array, 256 * 4);
 681
 682        /* Compute the SHA1 hash of sorted object names. */
 683        SHA1_Init(&ctx);
 684
 685        /* Write the actual SHA1 entries. */
 686        list = sorted_by_sha;
 687        for (i = 0; i < nr_result; i++) {
 688                struct object_entry *entry = *list++;
 689                if (index_version < 2) {
 690                        uint32_t offset = htonl(entry->offset);
 691                        sha1write(f, &offset, 4);
 692                }
 693                sha1write(f, entry->sha1, 20);
 694                SHA1_Update(&ctx, entry->sha1, 20);
 695        }
 696
 697        if (index_version >= 2) {
 698                unsigned int nr_large_offset = 0;
 699
 700                /* write the crc32 table */
 701                list = sorted_by_sha;
 702                for (i = 0; i < nr_objects; i++) {
 703                        struct object_entry *entry = *list++;
 704                        uint32_t crc32_val = htonl(entry->crc32);
 705                        sha1write(f, &crc32_val, 4);
 706                }
 707
 708                /* write the 32-bit offset table */
 709                list = sorted_by_sha;
 710                for (i = 0; i < nr_objects; i++) {
 711                        struct object_entry *entry = *list++;
 712                        uint32_t offset = (entry->offset <= index_off32_limit) ?
 713                                entry->offset : (0x80000000 | nr_large_offset++);
 714                        offset = htonl(offset);
 715                        sha1write(f, &offset, 4);
 716                }
 717
 718                /* write the large offset table */
 719                list = sorted_by_sha;
 720                while (nr_large_offset) {
 721                        struct object_entry *entry = *list++;
 722                        uint64_t offset = entry->offset;
 723                        if (offset > index_off32_limit) {
 724                                uint32_t split[2];
 725                                split[0]        = htonl(offset >> 32);
 726                                split[1] = htonl(offset & 0xffffffff);
 727                                sha1write(f, split, 8);
 728                                nr_large_offset--;
 729                        }
 730                }
 731        }
 732
 733        sha1write(f, pack_file_sha1, 20);
 734        sha1close(f, NULL, 1);
 735        free(sorted_by_sha);
 736        SHA1_Final(sha1, &ctx);
 737}
 738
 739static int locate_object_entry_hash(const unsigned char *sha1)
 740{
 741        int i;
 742        unsigned int ui;
 743        memcpy(&ui, sha1, sizeof(unsigned int));
 744        i = ui % object_ix_hashsz;
 745        while (0 < object_ix[i]) {
 746                if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1))
 747                        return i;
 748                if (++i == object_ix_hashsz)
 749                        i = 0;
 750        }
 751        return -1 - i;
 752}
 753
 754static struct object_entry *locate_object_entry(const unsigned char *sha1)
 755{
 756        int i;
 757
 758        if (!object_ix_hashsz)
 759                return NULL;
 760
 761        i = locate_object_entry_hash(sha1);
 762        if (0 <= i)
 763                return &objects[object_ix[i]-1];
 764        return NULL;
 765}
 766
 767static void rehash_objects(void)
 768{
 769        uint32_t i;
 770        struct object_entry *oe;
 771
 772        object_ix_hashsz = nr_objects * 3;
 773        if (object_ix_hashsz < 1024)
 774                object_ix_hashsz = 1024;
 775        object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
 776        memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
 777        for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
 778                int ix = locate_object_entry_hash(oe->sha1);
 779                if (0 <= ix)
 780                        continue;
 781                ix = -1 - ix;
 782                object_ix[ix] = i + 1;
 783        }
 784}
 785
 786static unsigned name_hash(const char *name)
 787{
 788        unsigned char c;
 789        unsigned hash = 0;
 790
 791        /*
 792         * This effectively just creates a sortable number from the
 793         * last sixteen non-whitespace characters. Last characters
 794         * count "most", so things that end in ".c" sort together.
 795         */
 796        while ((c = *name++) != 0) {
 797                if (isspace(c))
 798                        continue;
 799                hash = (hash >> 2) + (c << 24);
 800        }
 801        return hash;
 802}
 803
 804static int add_object_entry(const unsigned char *sha1, enum object_type type,
 805                            unsigned hash, int exclude)
 806{
 807        struct object_entry *entry;
 808        struct packed_git *p, *found_pack = NULL;
 809        off_t found_offset = 0;
 810        int ix;
 811
 812        ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
 813        if (ix >= 0) {
 814                if (exclude) {
 815                        entry = objects + object_ix[ix] - 1;
 816                        if (!entry->preferred_base)
 817                                nr_result--;
 818                        entry->preferred_base = 1;
 819                }
 820                return 0;
 821        }
 822
 823        for (p = packed_git; p; p = p->next) {
 824                off_t offset = find_pack_entry_one(sha1, p);
 825                if (offset) {
 826                        if (!found_pack) {
 827                                found_offset = offset;
 828                                found_pack = p;
 829                        }
 830                        if (exclude)
 831                                break;
 832                        if (incremental)
 833                                return 0;
 834                        if (local && !p->pack_local)
 835                                return 0;
 836                }
 837        }
 838
 839        if (nr_objects >= nr_alloc) {
 840                nr_alloc = (nr_alloc  + 1024) * 3 / 2;
 841                objects = xrealloc(objects, nr_alloc * sizeof(*entry));
 842        }
 843
 844        entry = objects + nr_objects++;
 845        memset(entry, 0, sizeof(*entry));
 846        hashcpy(entry->sha1, sha1);
 847        entry->hash = hash;
 848        if (type)
 849                entry->type = type;
 850        if (exclude)
 851                entry->preferred_base = 1;
 852        else
 853                nr_result++;
 854        if (found_pack) {
 855                entry->in_pack = found_pack;
 856                entry->in_pack_offset = found_offset;
 857        }
 858
 859        if (object_ix_hashsz * 3 <= nr_objects * 4)
 860                rehash_objects();
 861        else
 862                object_ix[-1 - ix] = nr_objects;
 863
 864        if (progress)
 865                display_progress(&progress_state, nr_objects);
 866
 867        return 1;
 868}
 869
 870struct pbase_tree_cache {
 871        unsigned char sha1[20];
 872        int ref;
 873        int temporary;
 874        void *tree_data;
 875        unsigned long tree_size;
 876};
 877
 878static struct pbase_tree_cache *(pbase_tree_cache[256]);
 879static int pbase_tree_cache_ix(const unsigned char *sha1)
 880{
 881        return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
 882}
 883static int pbase_tree_cache_ix_incr(int ix)
 884{
 885        return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
 886}
 887
 888static struct pbase_tree {
 889        struct pbase_tree *next;
 890        /* This is a phony "cache" entry; we are not
 891         * going to evict it nor find it through _get()
 892         * mechanism -- this is for the toplevel node that
 893         * would almost always change with any commit.
 894         */
 895        struct pbase_tree_cache pcache;
 896} *pbase_tree;
 897
 898static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
 899{
 900        struct pbase_tree_cache *ent, *nent;
 901        void *data;
 902        unsigned long size;
 903        enum object_type type;
 904        int neigh;
 905        int my_ix = pbase_tree_cache_ix(sha1);
 906        int available_ix = -1;
 907
 908        /* pbase-tree-cache acts as a limited hashtable.
 909         * your object will be found at your index or within a few
 910         * slots after that slot if it is cached.
 911         */
 912        for (neigh = 0; neigh < 8; neigh++) {
 913                ent = pbase_tree_cache[my_ix];
 914                if (ent && !hashcmp(ent->sha1, sha1)) {
 915                        ent->ref++;
 916                        return ent;
 917                }
 918                else if (((available_ix < 0) && (!ent || !ent->ref)) ||
 919                         ((0 <= available_ix) &&
 920                          (!ent && pbase_tree_cache[available_ix])))
 921                        available_ix = my_ix;
 922                if (!ent)
 923                        break;
 924                my_ix = pbase_tree_cache_ix_incr(my_ix);
 925        }
 926
 927        /* Did not find one.  Either we got a bogus request or
 928         * we need to read and perhaps cache.
 929         */
 930        data = read_sha1_file(sha1, &type, &size);
 931        if (!data)
 932                return NULL;
 933        if (type != OBJ_TREE) {
 934                free(data);
 935                return NULL;
 936        }
 937
 938        /* We need to either cache or return a throwaway copy */
 939
 940        if (available_ix < 0)
 941                ent = NULL;
 942        else {
 943                ent = pbase_tree_cache[available_ix];
 944                my_ix = available_ix;
 945        }
 946
 947        if (!ent) {
 948                nent = xmalloc(sizeof(*nent));
 949                nent->temporary = (available_ix < 0);
 950        }
 951        else {
 952                /* evict and reuse */
 953                free(ent->tree_data);
 954                nent = ent;
 955        }
 956        hashcpy(nent->sha1, sha1);
 957        nent->tree_data = data;
 958        nent->tree_size = size;
 959        nent->ref = 1;
 960        if (!nent->temporary)
 961                pbase_tree_cache[my_ix] = nent;
 962        return nent;
 963}
 964
 965static void pbase_tree_put(struct pbase_tree_cache *cache)
 966{
 967        if (!cache->temporary) {
 968                cache->ref--;
 969                return;
 970        }
 971        free(cache->tree_data);
 972        free(cache);
 973}
 974
 975static int name_cmp_len(const char *name)
 976{
 977        int i;
 978        for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
 979                ;
 980        return i;
 981}
 982
 983static void add_pbase_object(struct tree_desc *tree,
 984                             const char *name,
 985                             int cmplen,
 986                             const char *fullname)
 987{
 988        struct name_entry entry;
 989        int cmp;
 990
 991        while (tree_entry(tree,&entry)) {
 992                cmp = tree_entry_len(entry.path, entry.sha1) != cmplen ? 1 :
 993                      memcmp(name, entry.path, cmplen);
 994                if (cmp > 0)
 995                        continue;
 996                if (cmp < 0)
 997                        return;
 998                if (name[cmplen] != '/') {
 999                        unsigned hash = name_hash(fullname);
1000                        add_object_entry(entry.sha1,
1001                                         S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB,
1002                                         hash, 1);
1003                        return;
1004                }
1005                if (S_ISDIR(entry.mode)) {
1006                        struct tree_desc sub;
1007                        struct pbase_tree_cache *tree;
1008                        const char *down = name+cmplen+1;
1009                        int downlen = name_cmp_len(down);
1010
1011                        tree = pbase_tree_get(entry.sha1);
1012                        if (!tree)
1013                                return;
1014                        init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1015
1016                        add_pbase_object(&sub, down, downlen, fullname);
1017                        pbase_tree_put(tree);
1018                }
1019        }
1020}
1021
1022static unsigned *done_pbase_paths;
1023static int done_pbase_paths_num;
1024static int done_pbase_paths_alloc;
1025static int done_pbase_path_pos(unsigned hash)
1026{
1027        int lo = 0;
1028        int hi = done_pbase_paths_num;
1029        while (lo < hi) {
1030                int mi = (hi + lo) / 2;
1031                if (done_pbase_paths[mi] == hash)
1032                        return mi;
1033                if (done_pbase_paths[mi] < hash)
1034                        hi = mi;
1035                else
1036                        lo = mi + 1;
1037        }
1038        return -lo-1;
1039}
1040
1041static int check_pbase_path(unsigned hash)
1042{
1043        int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
1044        if (0 <= pos)
1045                return 1;
1046        pos = -pos - 1;
1047        if (done_pbase_paths_alloc <= done_pbase_paths_num) {
1048                done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
1049                done_pbase_paths = xrealloc(done_pbase_paths,
1050                                            done_pbase_paths_alloc *
1051                                            sizeof(unsigned));
1052        }
1053        done_pbase_paths_num++;
1054        if (pos < done_pbase_paths_num)
1055                memmove(done_pbase_paths + pos + 1,
1056                        done_pbase_paths + pos,
1057                        (done_pbase_paths_num - pos - 1) * sizeof(unsigned));
1058        done_pbase_paths[pos] = hash;
1059        return 0;
1060}
1061
1062static void add_preferred_base_object(const char *name, unsigned hash)
1063{
1064        struct pbase_tree *it;
1065        int cmplen;
1066
1067        if (!num_preferred_base || check_pbase_path(hash))
1068                return;
1069
1070        cmplen = name_cmp_len(name);
1071        for (it = pbase_tree; it; it = it->next) {
1072                if (cmplen == 0) {
1073                        add_object_entry(it->pcache.sha1, OBJ_TREE, 0, 1);
1074                }
1075                else {
1076                        struct tree_desc tree;
1077                        init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1078                        add_pbase_object(&tree, name, cmplen, name);
1079                }
1080        }
1081}
1082
1083static void add_preferred_base(unsigned char *sha1)
1084{
1085        struct pbase_tree *it;
1086        void *data;
1087        unsigned long size;
1088        unsigned char tree_sha1[20];
1089
1090        if (window <= num_preferred_base++)
1091                return;
1092
1093        data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
1094        if (!data)
1095                return;
1096
1097        for (it = pbase_tree; it; it = it->next) {
1098                if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1099                        free(data);
1100                        return;
1101                }
1102        }
1103
1104        it = xcalloc(1, sizeof(*it));
1105        it->next = pbase_tree;
1106        pbase_tree = it;
1107
1108        hashcpy(it->pcache.sha1, tree_sha1);
1109        it->pcache.tree_data = data;
1110        it->pcache.tree_size = size;
1111}
1112
1113static void check_object(struct object_entry *entry)
1114{
1115        if (entry->in_pack) {
1116                struct packed_git *p = entry->in_pack;
1117                struct pack_window *w_curs = NULL;
1118                const unsigned char *base_ref = NULL;
1119                struct object_entry *base_entry;
1120                unsigned long used, used_0;
1121                unsigned int avail;
1122                off_t ofs;
1123                unsigned char *buf, c;
1124
1125                buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1126
1127                /*
1128                 * We want in_pack_type even if we do not reuse delta.
1129                 * There is no point not reusing non-delta representations.
1130                 */
1131                used = unpack_object_header_gently(buf, avail,
1132                                                   &entry->in_pack_type,
1133                                                   &entry->size);
1134
1135                /*
1136                 * Determine if this is a delta and if so whether we can
1137                 * reuse it or not.  Otherwise let's find out as cheaply as
1138                 * possible what the actual type and size for this object is.
1139                 */
1140                switch (entry->in_pack_type) {
1141                default:
1142                        /* Not a delta hence we've already got all we need. */
1143                        entry->type = entry->in_pack_type;
1144                        entry->in_pack_header_size = used;
1145                        unuse_pack(&w_curs);
1146                        return;
1147                case OBJ_REF_DELTA:
1148                        if (!no_reuse_delta && !entry->preferred_base)
1149                                base_ref = use_pack(p, &w_curs,
1150                                                entry->in_pack_offset + used, NULL);
1151                        entry->in_pack_header_size = used + 20;
1152                        break;
1153                case OBJ_OFS_DELTA:
1154                        buf = use_pack(p, &w_curs,
1155                                       entry->in_pack_offset + used, NULL);
1156                        used_0 = 0;
1157                        c = buf[used_0++];
1158                        ofs = c & 127;
1159                        while (c & 128) {
1160                                ofs += 1;
1161                                if (!ofs || MSB(ofs, 7))
1162                                        die("delta base offset overflow in pack for %s",
1163                                            sha1_to_hex(entry->sha1));
1164                                c = buf[used_0++];
1165                                ofs = (ofs << 7) + (c & 127);
1166                        }
1167                        if (ofs >= entry->in_pack_offset)
1168                                die("delta base offset out of bound for %s",
1169                                    sha1_to_hex(entry->sha1));
1170                        ofs = entry->in_pack_offset - ofs;
1171                        if (!no_reuse_delta && !entry->preferred_base)
1172                                base_ref = find_packed_object_name(p, ofs);
1173                        entry->in_pack_header_size = used + used_0;
1174                        break;
1175                }
1176
1177                if (base_ref && (base_entry = locate_object_entry(base_ref))) {
1178                        /*
1179                         * If base_ref was set above that means we wish to
1180                         * reuse delta data, and we even found that base
1181                         * in the list of objects we want to pack. Goodie!
1182                         *
1183                         * Depth value does not matter - find_deltas() will
1184                         * never consider reused delta as the base object to
1185                         * deltify other objects against, in order to avoid
1186                         * circular deltas.
1187                         */
1188                        entry->type = entry->in_pack_type;
1189                        entry->delta = base_entry;
1190                        entry->delta_sibling = base_entry->delta_child;
1191                        base_entry->delta_child = entry;
1192                        unuse_pack(&w_curs);
1193                        return;
1194                }
1195
1196                if (entry->type) {
1197                        /*
1198                         * This must be a delta and we already know what the
1199                         * final object type is.  Let's extract the actual
1200                         * object size from the delta header.
1201                         */
1202                        entry->size = get_size_from_delta(p, &w_curs,
1203                                        entry->in_pack_offset + entry->in_pack_header_size);
1204                        unuse_pack(&w_curs);
1205                        return;
1206                }
1207
1208                /*
1209                 * No choice but to fall back to the recursive delta walk
1210                 * with sha1_object_info() to find about the object type
1211                 * at this point...
1212                 */
1213                unuse_pack(&w_curs);
1214        }
1215
1216        entry->type = sha1_object_info(entry->sha1, &entry->size);
1217        if (entry->type < 0)
1218                die("unable to get type of object %s",
1219                    sha1_to_hex(entry->sha1));
1220}
1221
1222static int pack_offset_sort(const void *_a, const void *_b)
1223{
1224        const struct object_entry *a = *(struct object_entry **)_a;
1225        const struct object_entry *b = *(struct object_entry **)_b;
1226
1227        /* avoid filesystem trashing with loose objects */
1228        if (!a->in_pack && !b->in_pack)
1229                return hashcmp(a->sha1, b->sha1);
1230
1231        if (a->in_pack < b->in_pack)
1232                return -1;
1233        if (a->in_pack > b->in_pack)
1234                return 1;
1235        return a->in_pack_offset < b->in_pack_offset ? -1 :
1236                        (a->in_pack_offset > b->in_pack_offset);
1237}
1238
1239static void get_object_details(void)
1240{
1241        uint32_t i;
1242        struct object_entry **sorted_by_offset;
1243
1244        sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
1245        for (i = 0; i < nr_objects; i++)
1246                sorted_by_offset[i] = objects + i;
1247        qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1248
1249        prepare_pack_ix();
1250        for (i = 0; i < nr_objects; i++)
1251                check_object(sorted_by_offset[i]);
1252        free(sorted_by_offset);
1253}
1254
1255static int type_size_sort(const void *_a, const void *_b)
1256{
1257        const struct object_entry *a = *(struct object_entry **)_a;
1258        const struct object_entry *b = *(struct object_entry **)_b;
1259
1260        if (a->type < b->type)
1261                return -1;
1262        if (a->type > b->type)
1263                return 1;
1264        if (a->hash < b->hash)
1265                return -1;
1266        if (a->hash > b->hash)
1267                return 1;
1268        if (a->preferred_base < b->preferred_base)
1269                return -1;
1270        if (a->preferred_base > b->preferred_base)
1271                return 1;
1272        if (a->size < b->size)
1273                return -1;
1274        if (a->size > b->size)
1275                return 1;
1276        return a > b ? -1 : (a < b);  /* newest last */
1277}
1278
1279struct unpacked {
1280        struct object_entry *entry;
1281        void *data;
1282        struct delta_index *index;
1283};
1284
1285/*
1286 * We search for deltas _backwards_ in a list sorted by type and
1287 * by size, so that we see progressively smaller and smaller files.
1288 * That's because we prefer deltas to be from the bigger file
1289 * to the smaller - deletes are potentially cheaper, but perhaps
1290 * more importantly, the bigger file is likely the more recent
1291 * one.
1292 */
1293static int try_delta(struct unpacked *trg, struct unpacked *src,
1294                     unsigned max_depth)
1295{
1296        struct object_entry *trg_entry = trg->entry;
1297        struct object_entry *src_entry = src->entry;
1298        unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1299        enum object_type type;
1300        void *delta_buf;
1301
1302        /* Don't bother doing diffs between different types */
1303        if (trg_entry->type != src_entry->type)
1304                return -1;
1305
1306        /* We do not compute delta to *create* objects we are not
1307         * going to pack.
1308         */
1309        if (trg_entry->preferred_base)
1310                return -1;
1311
1312        /*
1313         * We do not bother to try a delta that we discarded
1314         * on an earlier try, but only when reusing delta data.
1315         */
1316        if (!no_reuse_delta && trg_entry->in_pack &&
1317            trg_entry->in_pack == src_entry->in_pack &&
1318            trg_entry->in_pack_type != OBJ_REF_DELTA &&
1319            trg_entry->in_pack_type != OBJ_OFS_DELTA)
1320                return 0;
1321
1322        /* Let's not bust the allowed depth. */
1323        if (src_entry->depth >= max_depth)
1324                return 0;
1325
1326        /* Now some size filtering heuristics. */
1327        trg_size = trg_entry->size;
1328        max_size = trg_size/2 - 20;
1329        max_size = max_size * (max_depth - src_entry->depth) / max_depth;
1330        if (max_size == 0)
1331                return 0;
1332        if (trg_entry->delta && trg_entry->delta_size <= max_size)
1333                max_size = trg_entry->delta_size-1;
1334        src_size = src_entry->size;
1335        sizediff = src_size < trg_size ? trg_size - src_size : 0;
1336        if (sizediff >= max_size)
1337                return 0;
1338
1339        /* Load data if not already done */
1340        if (!trg->data) {
1341                trg->data = read_sha1_file(trg_entry->sha1, &type, &sz);
1342                if (sz != trg_size)
1343                        die("object %s inconsistent object length (%lu vs %lu)",
1344                            sha1_to_hex(trg_entry->sha1), sz, trg_size);
1345        }
1346        if (!src->data) {
1347                src->data = read_sha1_file(src_entry->sha1, &type, &sz);
1348                if (sz != src_size)
1349                        die("object %s inconsistent object length (%lu vs %lu)",
1350                            sha1_to_hex(src_entry->sha1), sz, src_size);
1351        }
1352        if (!src->index) {
1353                src->index = create_delta_index(src->data, src_size);
1354                if (!src->index)
1355                        die("out of memory");
1356        }
1357
1358        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1359        if (!delta_buf)
1360                return 0;
1361
1362        trg_entry->delta = src_entry;
1363        trg_entry->delta_size = delta_size;
1364        trg_entry->depth = src_entry->depth + 1;
1365        free(delta_buf);
1366        return 1;
1367}
1368
1369static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1370{
1371        struct object_entry *child = me->delta_child;
1372        unsigned int m = n;
1373        while (child) {
1374                unsigned int c = check_delta_limit(child, n + 1);
1375                if (m < c)
1376                        m = c;
1377                child = child->delta_sibling;
1378        }
1379        return m;
1380}
1381
1382static void find_deltas(struct object_entry **list, int window, int depth)
1383{
1384        uint32_t i = nr_objects, idx = 0, processed = 0;
1385        unsigned int array_size = window * sizeof(struct unpacked);
1386        struct unpacked *array;
1387        int max_depth;
1388
1389        if (!nr_objects)
1390                return;
1391        array = xmalloc(array_size);
1392        memset(array, 0, array_size);
1393        if (progress)
1394                start_progress(&progress_state, "Deltifying %u objects...", "", nr_result);
1395
1396        do {
1397                struct object_entry *entry = list[--i];
1398                struct unpacked *n = array + idx;
1399                int j;
1400
1401                if (!entry->preferred_base)
1402                        processed++;
1403
1404                if (progress)
1405                        display_progress(&progress_state, processed);
1406
1407                if (entry->delta)
1408                        /* This happens if we decided to reuse existing
1409                         * delta from a pack.  "!no_reuse_delta &&" is implied.
1410                         */
1411                        continue;
1412
1413                if (entry->size < 50)
1414                        continue;
1415                free_delta_index(n->index);
1416                n->index = NULL;
1417                free(n->data);
1418                n->data = NULL;
1419                n->entry = entry;
1420
1421                /*
1422                 * If the current object is at pack edge, take the depth the
1423                 * objects that depend on the current object into account
1424                 * otherwise they would become too deep.
1425                 */
1426                max_depth = depth;
1427                if (entry->delta_child) {
1428                        max_depth -= check_delta_limit(entry, 0);
1429                        if (max_depth <= 0)
1430                                goto next;
1431                }
1432
1433                j = window;
1434                while (--j > 0) {
1435                        uint32_t other_idx = idx + j;
1436                        struct unpacked *m;
1437                        if (other_idx >= window)
1438                                other_idx -= window;
1439                        m = array + other_idx;
1440                        if (!m->entry)
1441                                break;
1442                        if (try_delta(n, m, max_depth) < 0)
1443                                break;
1444                }
1445
1446                /* if we made n a delta, and if n is already at max
1447                 * depth, leaving it in the window is pointless.  we
1448                 * should evict it first.
1449                 */
1450                if (entry->delta && depth <= entry->depth)
1451                        continue;
1452
1453                next:
1454                idx++;
1455                if (idx >= window)
1456                        idx = 0;
1457        } while (i > 0);
1458
1459        if (progress)
1460                stop_progress(&progress_state);
1461
1462        for (i = 0; i < window; ++i) {
1463                free_delta_index(array[i].index);
1464                free(array[i].data);
1465        }
1466        free(array);
1467}
1468
1469static void prepare_pack(int window, int depth)
1470{
1471        struct object_entry **delta_list;
1472        uint32_t i;
1473
1474        get_object_details();
1475
1476        if (!window || !depth)
1477                return;
1478
1479        delta_list = xmalloc(nr_objects * sizeof(*delta_list));
1480        for (i = 0; i < nr_objects; i++)
1481                delta_list[i] = objects + i;
1482        qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort);
1483        find_deltas(delta_list, window+1, depth);
1484        free(delta_list);
1485}
1486
1487static int git_pack_config(const char *k, const char *v)
1488{
1489        if(!strcmp(k, "pack.window")) {
1490                window = git_config_int(k, v);
1491                return 0;
1492        }
1493        if(!strcmp(k, "pack.depth")) {
1494                depth = git_config_int(k, v);
1495                return 0;
1496        }
1497        return git_default_config(k, v);
1498}
1499
1500static void read_object_list_from_stdin(void)
1501{
1502        char line[40 + 1 + PATH_MAX + 2];
1503        unsigned char sha1[20];
1504        unsigned hash;
1505
1506        for (;;) {
1507                if (!fgets(line, sizeof(line), stdin)) {
1508                        if (feof(stdin))
1509                                break;
1510                        if (!ferror(stdin))
1511                                die("fgets returned NULL, not EOF, not error!");
1512                        if (errno != EINTR)
1513                                die("fgets: %s", strerror(errno));
1514                        clearerr(stdin);
1515                        continue;
1516                }
1517                if (line[0] == '-') {
1518                        if (get_sha1_hex(line+1, sha1))
1519                                die("expected edge sha1, got garbage:\n %s",
1520                                    line);
1521                        add_preferred_base(sha1);
1522                        continue;
1523                }
1524                if (get_sha1_hex(line, sha1))
1525                        die("expected sha1, got garbage:\n %s", line);
1526
1527                hash = name_hash(line+41);
1528                add_preferred_base_object(line+41, hash);
1529                add_object_entry(sha1, 0, hash, 0);
1530        }
1531}
1532
1533static void show_commit(struct commit *commit)
1534{
1535        add_object_entry(commit->object.sha1, OBJ_COMMIT, 0, 0);
1536}
1537
1538static void show_object(struct object_array_entry *p)
1539{
1540        unsigned hash = name_hash(p->name);
1541        add_preferred_base_object(p->name, hash);
1542        add_object_entry(p->item->sha1, p->item->type, hash, 0);
1543}
1544
1545static void show_edge(struct commit *commit)
1546{
1547        add_preferred_base(commit->object.sha1);
1548}
1549
1550static void get_object_list(int ac, const char **av)
1551{
1552        struct rev_info revs;
1553        char line[1000];
1554        int flags = 0;
1555
1556        init_revisions(&revs, NULL);
1557        save_commit_buffer = 0;
1558        track_object_refs = 0;
1559        setup_revisions(ac, av, &revs, NULL);
1560
1561        while (fgets(line, sizeof(line), stdin) != NULL) {
1562                int len = strlen(line);
1563                if (line[len - 1] == '\n')
1564                        line[--len] = 0;
1565                if (!len)
1566                        break;
1567                if (*line == '-') {
1568                        if (!strcmp(line, "--not")) {
1569                                flags ^= UNINTERESTING;
1570                                continue;
1571                        }
1572                        die("not a rev '%s'", line);
1573                }
1574                if (handle_revision_arg(line, &revs, flags, 1))
1575                        die("bad revision '%s'", line);
1576        }
1577
1578        prepare_revision_walk(&revs);
1579        mark_edges_uninteresting(revs.commits, &revs, show_edge);
1580        traverse_commit_list(&revs, show_commit, show_object);
1581}
1582
1583static int adjust_perm(const char *path, mode_t mode)
1584{
1585        if (chmod(path, mode))
1586                return -1;
1587        return adjust_shared_perm(path);
1588}
1589
1590int cmd_pack_objects(int argc, const char **argv, const char *prefix)
1591{
1592        int use_internal_rev_list = 0;
1593        int thin = 0;
1594        uint32_t i;
1595        off_t last_obj_offset;
1596        const char *base_name = NULL;
1597        const char **rp_av;
1598        int rp_ac_alloc = 64;
1599        int rp_ac;
1600
1601        rp_av = xcalloc(rp_ac_alloc, sizeof(*rp_av));
1602
1603        rp_av[0] = "pack-objects";
1604        rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
1605        rp_ac = 2;
1606
1607        git_config(git_pack_config);
1608
1609        progress = isatty(2);
1610        for (i = 1; i < argc; i++) {
1611                const char *arg = argv[i];
1612
1613                if (*arg != '-')
1614                        break;
1615
1616                if (!strcmp("--non-empty", arg)) {
1617                        non_empty = 1;
1618                        continue;
1619                }
1620                if (!strcmp("--local", arg)) {
1621                        local = 1;
1622                        continue;
1623                }
1624                if (!strcmp("--incremental", arg)) {
1625                        incremental = 1;
1626                        continue;
1627                }
1628                if (!prefixcmp(arg, "--window=")) {
1629                        char *end;
1630                        window = strtoul(arg+9, &end, 0);
1631                        if (!arg[9] || *end)
1632                                usage(pack_usage);
1633                        continue;
1634                }
1635                if (!prefixcmp(arg, "--depth=")) {
1636                        char *end;
1637                        depth = strtoul(arg+8, &end, 0);
1638                        if (!arg[8] || *end)
1639                                usage(pack_usage);
1640                        continue;
1641                }
1642                if (!strcmp("--progress", arg)) {
1643                        progress = 1;
1644                        continue;
1645                }
1646                if (!strcmp("--all-progress", arg)) {
1647                        progress = 2;
1648                        continue;
1649                }
1650                if (!strcmp("-q", arg)) {
1651                        progress = 0;
1652                        continue;
1653                }
1654                if (!strcmp("--no-reuse-delta", arg)) {
1655                        no_reuse_delta = 1;
1656                        continue;
1657                }
1658                if (!strcmp("--delta-base-offset", arg)) {
1659                        allow_ofs_delta = 1;
1660                        continue;
1661                }
1662                if (!strcmp("--stdout", arg)) {
1663                        pack_to_stdout = 1;
1664                        continue;
1665                }
1666                if (!strcmp("--revs", arg)) {
1667                        use_internal_rev_list = 1;
1668                        continue;
1669                }
1670                if (!strcmp("--unpacked", arg) ||
1671                    !prefixcmp(arg, "--unpacked=") ||
1672                    !strcmp("--reflog", arg) ||
1673                    !strcmp("--all", arg)) {
1674                        use_internal_rev_list = 1;
1675                        if (rp_ac >= rp_ac_alloc - 1) {
1676                                rp_ac_alloc = alloc_nr(rp_ac_alloc);
1677                                rp_av = xrealloc(rp_av,
1678                                                 rp_ac_alloc * sizeof(*rp_av));
1679                        }
1680                        rp_av[rp_ac++] = arg;
1681                        continue;
1682                }
1683                if (!strcmp("--thin", arg)) {
1684                        use_internal_rev_list = 1;
1685                        thin = 1;
1686                        rp_av[1] = "--objects-edge";
1687                        continue;
1688                }
1689                if (!prefixcmp(arg, "--index-version=")) {
1690                        char *c;
1691                        index_default_version = strtoul(arg + 16, &c, 10);
1692                        if (index_default_version > 2)
1693                                die("bad %s", arg);
1694                        if (*c == ',')
1695                                index_off32_limit = strtoul(c+1, &c, 0);
1696                        if (*c || index_off32_limit & 0x80000000)
1697                                die("bad %s", arg);
1698                        continue;
1699                }
1700                usage(pack_usage);
1701        }
1702
1703        /* Traditionally "pack-objects [options] base extra" failed;
1704         * we would however want to take refs parameter that would
1705         * have been given to upstream rev-list ourselves, which means
1706         * we somehow want to say what the base name is.  So the
1707         * syntax would be:
1708         *
1709         * pack-objects [options] base <refs...>
1710         *
1711         * in other words, we would treat the first non-option as the
1712         * base_name and send everything else to the internal revision
1713         * walker.
1714         */
1715
1716        if (!pack_to_stdout)
1717                base_name = argv[i++];
1718
1719        if (pack_to_stdout != !base_name)
1720                usage(pack_usage);
1721
1722        if (!pack_to_stdout && thin)
1723                die("--thin cannot be used to build an indexable pack.");
1724
1725        prepare_packed_git();
1726
1727        if (progress)
1728                start_progress(&progress_state, "Generating pack...",
1729                               "Counting objects: ", 0);
1730        if (!use_internal_rev_list)
1731                read_object_list_from_stdin();
1732        else {
1733                rp_av[rp_ac] = NULL;
1734                get_object_list(rp_ac, rp_av);
1735        }
1736        if (progress) {
1737                stop_progress(&progress_state);
1738                fprintf(stderr, "Done counting %u objects.\n", nr_objects);
1739        }
1740
1741        if (non_empty && !nr_result)
1742                return 0;
1743        if (progress && (nr_objects != nr_result))
1744                fprintf(stderr, "Result has %u objects.\n", nr_result);
1745        if (nr_result)
1746                prepare_pack(window, depth);
1747        last_obj_offset = write_pack_file();
1748        if (!pack_to_stdout) {
1749                unsigned char object_list_sha1[20];
1750                mode_t mode = umask(0);
1751
1752                umask(mode);
1753                mode = 0444 & ~mode;
1754
1755                write_index_file(last_obj_offset, object_list_sha1);
1756                snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
1757                         base_name, sha1_to_hex(object_list_sha1));
1758                if (adjust_perm(pack_tmp_name, mode))
1759                        die("unable to make temporary pack file readable: %s",
1760                            strerror(errno));
1761                if (rename(pack_tmp_name, tmpname))
1762                        die("unable to rename temporary pack file: %s",
1763                            strerror(errno));
1764                snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
1765                         base_name, sha1_to_hex(object_list_sha1));
1766                if (adjust_perm(idx_tmp_name, mode))
1767                        die("unable to make temporary index file readable: %s",
1768                            strerror(errno));
1769                if (rename(idx_tmp_name, tmpname))
1770                        die("unable to rename temporary index file: %s",
1771                            strerror(errno));
1772                puts(sha1_to_hex(object_list_sha1));
1773        }
1774        if (progress)
1775                fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
1776                        written, written_delta, reused, reused_delta);
1777        return 0;
1778}