builtin / pack-objects.con commit rev-list: add bitmap mode to speed up object lists (aa32939)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "attr.h"
   4#include "object.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9#include "delta.h"
  10#include "pack.h"
  11#include "pack-revindex.h"
  12#include "csum-file.h"
  13#include "tree-walk.h"
  14#include "diff.h"
  15#include "revision.h"
  16#include "list-objects.h"
  17#include "pack-objects.h"
  18#include "progress.h"
  19#include "refs.h"
  20#include "streaming.h"
  21#include "thread-utils.h"
  22#include "pack-bitmap.h"
  23
  24static const char *pack_usage[] = {
  25        N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
  26        N_("git pack-objects [options...] base-name [< ref-list | < object-list]"),
  27        NULL
  28};
  29
  30/*
  31 * Objects we are going to pack are collected in the `to_pack` structure.
  32 * It contains an array (dynamically expanded) of the object data, and a map
  33 * that can resolve SHA1s to their position in the array.
  34 */
  35static struct packing_data to_pack;
  36
  37static struct pack_idx_entry **written_list;
  38static uint32_t nr_result, nr_written;
  39
  40static int non_empty;
  41static int reuse_delta = 1, reuse_object = 1;
  42static int keep_unreachable, unpack_unreachable, include_tag;
  43static unsigned long unpack_unreachable_expiration;
  44static int local;
  45static int incremental;
  46static int ignore_packed_keep;
  47static int allow_ofs_delta;
  48static struct pack_idx_option pack_idx_opts;
  49static const char *base_name;
  50static int progress = 1;
  51static int window = 10;
  52static unsigned long pack_size_limit;
  53static int depth = 50;
  54static int delta_search_threads;
  55static int pack_to_stdout;
  56static int num_preferred_base;
  57static struct progress *progress_state;
  58static int pack_compression_level = Z_DEFAULT_COMPRESSION;
  59static int pack_compression_seen;
  60
  61static struct packed_git *reuse_packfile;
  62static uint32_t reuse_packfile_objects;
  63static off_t reuse_packfile_offset;
  64
  65static int use_bitmap_index = 1;
  66
  67static unsigned long delta_cache_size = 0;
  68static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
  69static unsigned long cache_max_small_delta_size = 1000;
  70
  71static unsigned long window_memory_limit = 0;
  72
  73/*
  74 * stats
  75 */
  76static uint32_t written, written_delta;
  77static uint32_t reused, reused_delta;
  78
  79static void *get_delta(struct object_entry *entry)
  80{
  81        unsigned long size, base_size, delta_size;
  82        void *buf, *base_buf, *delta_buf;
  83        enum object_type type;
  84
  85        buf = read_sha1_file(entry->idx.sha1, &type, &size);
  86        if (!buf)
  87                die("unable to read %s", sha1_to_hex(entry->idx.sha1));
  88        base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size);
  89        if (!base_buf)
  90                die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1));
  91        delta_buf = diff_delta(base_buf, base_size,
  92                               buf, size, &delta_size, 0);
  93        if (!delta_buf || delta_size != entry->delta_size)
  94                die("delta size changed");
  95        free(buf);
  96        free(base_buf);
  97        return delta_buf;
  98}
  99
 100static unsigned long do_compress(void **pptr, unsigned long size)
 101{
 102        git_zstream stream;
 103        void *in, *out;
 104        unsigned long maxsize;
 105
 106        memset(&stream, 0, sizeof(stream));
 107        git_deflate_init(&stream, pack_compression_level);
 108        maxsize = git_deflate_bound(&stream, size);
 109
 110        in = *pptr;
 111        out = xmalloc(maxsize);
 112        *pptr = out;
 113
 114        stream.next_in = in;
 115        stream.avail_in = size;
 116        stream.next_out = out;
 117        stream.avail_out = maxsize;
 118        while (git_deflate(&stream, Z_FINISH) == Z_OK)
 119                ; /* nothing */
 120        git_deflate_end(&stream);
 121
 122        free(in);
 123        return stream.total_out;
 124}
 125
 126static unsigned long write_large_blob_data(struct git_istream *st, struct sha1file *f,
 127                                           const unsigned char *sha1)
 128{
 129        git_zstream stream;
 130        unsigned char ibuf[1024 * 16];
 131        unsigned char obuf[1024 * 16];
 132        unsigned long olen = 0;
 133
 134        memset(&stream, 0, sizeof(stream));
 135        git_deflate_init(&stream, pack_compression_level);
 136
 137        for (;;) {
 138                ssize_t readlen;
 139                int zret = Z_OK;
 140                readlen = read_istream(st, ibuf, sizeof(ibuf));
 141                if (readlen == -1)
 142                        die(_("unable to read %s"), sha1_to_hex(sha1));
 143
 144                stream.next_in = ibuf;
 145                stream.avail_in = readlen;
 146                while ((stream.avail_in || readlen == 0) &&
 147                       (zret == Z_OK || zret == Z_BUF_ERROR)) {
 148                        stream.next_out = obuf;
 149                        stream.avail_out = sizeof(obuf);
 150                        zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 151                        sha1write(f, obuf, stream.next_out - obuf);
 152                        olen += stream.next_out - obuf;
 153                }
 154                if (stream.avail_in)
 155                        die(_("deflate error (%d)"), zret);
 156                if (readlen == 0) {
 157                        if (zret != Z_STREAM_END)
 158                                die(_("deflate error (%d)"), zret);
 159                        break;
 160                }
 161        }
 162        git_deflate_end(&stream);
 163        return olen;
 164}
 165
 166/*
 167 * we are going to reuse the existing object data as is.  make
 168 * sure it is not corrupt.
 169 */
 170static int check_pack_inflate(struct packed_git *p,
 171                struct pack_window **w_curs,
 172                off_t offset,
 173                off_t len,
 174                unsigned long expect)
 175{
 176        git_zstream stream;
 177        unsigned char fakebuf[4096], *in;
 178        int st;
 179
 180        memset(&stream, 0, sizeof(stream));
 181        git_inflate_init(&stream);
 182        do {
 183                in = use_pack(p, w_curs, offset, &stream.avail_in);
 184                stream.next_in = in;
 185                stream.next_out = fakebuf;
 186                stream.avail_out = sizeof(fakebuf);
 187                st = git_inflate(&stream, Z_FINISH);
 188                offset += stream.next_in - in;
 189        } while (st == Z_OK || st == Z_BUF_ERROR);
 190        git_inflate_end(&stream);
 191        return (st == Z_STREAM_END &&
 192                stream.total_out == expect &&
 193                stream.total_in == len) ? 0 : -1;
 194}
 195
 196static void copy_pack_data(struct sha1file *f,
 197                struct packed_git *p,
 198                struct pack_window **w_curs,
 199                off_t offset,
 200                off_t len)
 201{
 202        unsigned char *in;
 203        unsigned long avail;
 204
 205        while (len) {
 206                in = use_pack(p, w_curs, offset, &avail);
 207                if (avail > len)
 208                        avail = (unsigned long)len;
 209                sha1write(f, in, avail);
 210                offset += avail;
 211                len -= avail;
 212        }
 213}
 214
 215/* Return 0 if we will bust the pack-size limit */
 216static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
 217                                           unsigned long limit, int usable_delta)
 218{
 219        unsigned long size, datalen;
 220        unsigned char header[10], dheader[10];
 221        unsigned hdrlen;
 222        enum object_type type;
 223        void *buf;
 224        struct git_istream *st = NULL;
 225
 226        if (!usable_delta) {
 227                if (entry->type == OBJ_BLOB &&
 228                    entry->size > big_file_threshold &&
 229                    (st = open_istream(entry->idx.sha1, &type, &size, NULL)) != NULL)
 230                        buf = NULL;
 231                else {
 232                        buf = read_sha1_file(entry->idx.sha1, &type, &size);
 233                        if (!buf)
 234                                die(_("unable to read %s"), sha1_to_hex(entry->idx.sha1));
 235                }
 236                /*
 237                 * make sure no cached delta data remains from a
 238                 * previous attempt before a pack split occurred.
 239                 */
 240                free(entry->delta_data);
 241                entry->delta_data = NULL;
 242                entry->z_delta_size = 0;
 243        } else if (entry->delta_data) {
 244                size = entry->delta_size;
 245                buf = entry->delta_data;
 246                entry->delta_data = NULL;
 247                type = (allow_ofs_delta && entry->delta->idx.offset) ?
 248                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 249        } else {
 250                buf = get_delta(entry);
 251                size = entry->delta_size;
 252                type = (allow_ofs_delta && entry->delta->idx.offset) ?
 253                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 254        }
 255
 256        if (st) /* large blob case, just assume we don't compress well */
 257                datalen = size;
 258        else if (entry->z_delta_size)
 259                datalen = entry->z_delta_size;
 260        else
 261                datalen = do_compress(&buf, size);
 262
 263        /*
 264         * The object header is a byte of 'type' followed by zero or
 265         * more bytes of length.
 266         */
 267        hdrlen = encode_in_pack_object_header(type, size, header);
 268
 269        if (type == OBJ_OFS_DELTA) {
 270                /*
 271                 * Deltas with relative base contain an additional
 272                 * encoding of the relative offset for the delta
 273                 * base from this object's position in the pack.
 274                 */
 275                off_t ofs = entry->idx.offset - entry->delta->idx.offset;
 276                unsigned pos = sizeof(dheader) - 1;
 277                dheader[pos] = ofs & 127;
 278                while (ofs >>= 7)
 279                        dheader[--pos] = 128 | (--ofs & 127);
 280                if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 281                        if (st)
 282                                close_istream(st);
 283                        free(buf);
 284                        return 0;
 285                }
 286                sha1write(f, header, hdrlen);
 287                sha1write(f, dheader + pos, sizeof(dheader) - pos);
 288                hdrlen += sizeof(dheader) - pos;
 289        } else if (type == OBJ_REF_DELTA) {
 290                /*
 291                 * Deltas with a base reference contain
 292                 * an additional 20 bytes for the base sha1.
 293                 */
 294                if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 295                        if (st)
 296                                close_istream(st);
 297                        free(buf);
 298                        return 0;
 299                }
 300                sha1write(f, header, hdrlen);
 301                sha1write(f, entry->delta->idx.sha1, 20);
 302                hdrlen += 20;
 303        } else {
 304                if (limit && hdrlen + datalen + 20 >= limit) {
 305                        if (st)
 306                                close_istream(st);
 307                        free(buf);
 308                        return 0;
 309                }
 310                sha1write(f, header, hdrlen);
 311        }
 312        if (st) {
 313                datalen = write_large_blob_data(st, f, entry->idx.sha1);
 314                close_istream(st);
 315        } else {
 316                sha1write(f, buf, datalen);
 317                free(buf);
 318        }
 319
 320        return hdrlen + datalen;
 321}
 322
 323/* Return 0 if we will bust the pack-size limit */
 324static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
 325                                        unsigned long limit, int usable_delta)
 326{
 327        struct packed_git *p = entry->in_pack;
 328        struct pack_window *w_curs = NULL;
 329        struct revindex_entry *revidx;
 330        off_t offset;
 331        enum object_type type = entry->type;
 332        unsigned long datalen;
 333        unsigned char header[10], dheader[10];
 334        unsigned hdrlen;
 335
 336        if (entry->delta)
 337                type = (allow_ofs_delta && entry->delta->idx.offset) ?
 338                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 339        hdrlen = encode_in_pack_object_header(type, entry->size, header);
 340
 341        offset = entry->in_pack_offset;
 342        revidx = find_pack_revindex(p, offset);
 343        datalen = revidx[1].offset - offset;
 344        if (!pack_to_stdout && p->index_version > 1 &&
 345            check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
 346                error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
 347                unuse_pack(&w_curs);
 348                return write_no_reuse_object(f, entry, limit, usable_delta);
 349        }
 350
 351        offset += entry->in_pack_header_size;
 352        datalen -= entry->in_pack_header_size;
 353
 354        if (!pack_to_stdout && p->index_version == 1 &&
 355            check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
 356                error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
 357                unuse_pack(&w_curs);
 358                return write_no_reuse_object(f, entry, limit, usable_delta);
 359        }
 360
 361        if (type == OBJ_OFS_DELTA) {
 362                off_t ofs = entry->idx.offset - entry->delta->idx.offset;
 363                unsigned pos = sizeof(dheader) - 1;
 364                dheader[pos] = ofs & 127;
 365                while (ofs >>= 7)
 366                        dheader[--pos] = 128 | (--ofs & 127);
 367                if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 368                        unuse_pack(&w_curs);
 369                        return 0;
 370                }
 371                sha1write(f, header, hdrlen);
 372                sha1write(f, dheader + pos, sizeof(dheader) - pos);
 373                hdrlen += sizeof(dheader) - pos;
 374                reused_delta++;
 375        } else if (type == OBJ_REF_DELTA) {
 376                if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 377                        unuse_pack(&w_curs);
 378                        return 0;
 379                }
 380                sha1write(f, header, hdrlen);
 381                sha1write(f, entry->delta->idx.sha1, 20);
 382                hdrlen += 20;
 383                reused_delta++;
 384        } else {
 385                if (limit && hdrlen + datalen + 20 >= limit) {
 386                        unuse_pack(&w_curs);
 387                        return 0;
 388                }
 389                sha1write(f, header, hdrlen);
 390        }
 391        copy_pack_data(f, p, &w_curs, offset, datalen);
 392        unuse_pack(&w_curs);
 393        reused++;
 394        return hdrlen + datalen;
 395}
 396
 397/* Return 0 if we will bust the pack-size limit */
 398static unsigned long write_object(struct sha1file *f,
 399                                  struct object_entry *entry,
 400                                  off_t write_offset)
 401{
 402        unsigned long limit, len;
 403        int usable_delta, to_reuse;
 404
 405        if (!pack_to_stdout)
 406                crc32_begin(f);
 407
 408        /* apply size limit if limited packsize and not first object */
 409        if (!pack_size_limit || !nr_written)
 410                limit = 0;
 411        else if (pack_size_limit <= write_offset)
 412                /*
 413                 * the earlier object did not fit the limit; avoid
 414                 * mistaking this with unlimited (i.e. limit = 0).
 415                 */
 416                limit = 1;
 417        else
 418                limit = pack_size_limit - write_offset;
 419
 420        if (!entry->delta)
 421                usable_delta = 0;       /* no delta */
 422        else if (!pack_size_limit)
 423               usable_delta = 1;        /* unlimited packfile */
 424        else if (entry->delta->idx.offset == (off_t)-1)
 425                usable_delta = 0;       /* base was written to another pack */
 426        else if (entry->delta->idx.offset)
 427                usable_delta = 1;       /* base already exists in this pack */
 428        else
 429                usable_delta = 0;       /* base could end up in another pack */
 430
 431        if (!reuse_object)
 432                to_reuse = 0;   /* explicit */
 433        else if (!entry->in_pack)
 434                to_reuse = 0;   /* can't reuse what we don't have */
 435        else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA)
 436                                /* check_object() decided it for us ... */
 437                to_reuse = usable_delta;
 438                                /* ... but pack split may override that */
 439        else if (entry->type != entry->in_pack_type)
 440                to_reuse = 0;   /* pack has delta which is unusable */
 441        else if (entry->delta)
 442                to_reuse = 0;   /* we want to pack afresh */
 443        else
 444                to_reuse = 1;   /* we have it in-pack undeltified,
 445                                 * and we do not need to deltify it.
 446                                 */
 447
 448        if (!to_reuse)
 449                len = write_no_reuse_object(f, entry, limit, usable_delta);
 450        else
 451                len = write_reuse_object(f, entry, limit, usable_delta);
 452        if (!len)
 453                return 0;
 454
 455        if (usable_delta)
 456                written_delta++;
 457        written++;
 458        if (!pack_to_stdout)
 459                entry->idx.crc32 = crc32_end(f);
 460        return len;
 461}
 462
 463enum write_one_status {
 464        WRITE_ONE_SKIP = -1, /* already written */
 465        WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 466        WRITE_ONE_WRITTEN = 1, /* normal */
 467        WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 468};
 469
 470static enum write_one_status write_one(struct sha1file *f,
 471                                       struct object_entry *e,
 472                                       off_t *offset)
 473{
 474        unsigned long size;
 475        int recursing;
 476
 477        /*
 478         * we set offset to 1 (which is an impossible value) to mark
 479         * the fact that this object is involved in "write its base
 480         * first before writing a deltified object" recursion.
 481         */
 482        recursing = (e->idx.offset == 1);
 483        if (recursing) {
 484                warning("recursive delta detected for object %s",
 485                        sha1_to_hex(e->idx.sha1));
 486                return WRITE_ONE_RECURSIVE;
 487        } else if (e->idx.offset || e->preferred_base) {
 488                /* offset is non zero if object is written already. */
 489                return WRITE_ONE_SKIP;
 490        }
 491
 492        /* if we are deltified, write out base object first. */
 493        if (e->delta) {
 494                e->idx.offset = 1; /* now recurse */
 495                switch (write_one(f, e->delta, offset)) {
 496                case WRITE_ONE_RECURSIVE:
 497                        /* we cannot depend on this one */
 498                        e->delta = NULL;
 499                        break;
 500                default:
 501                        break;
 502                case WRITE_ONE_BREAK:
 503                        e->idx.offset = recursing;
 504                        return WRITE_ONE_BREAK;
 505                }
 506        }
 507
 508        e->idx.offset = *offset;
 509        size = write_object(f, e, *offset);
 510        if (!size) {
 511                e->idx.offset = recursing;
 512                return WRITE_ONE_BREAK;
 513        }
 514        written_list[nr_written++] = &e->idx;
 515
 516        /* make sure off_t is sufficiently large not to wrap */
 517        if (signed_add_overflows(*offset, size))
 518                die("pack too large for current definition of off_t");
 519        *offset += size;
 520        return WRITE_ONE_WRITTEN;
 521}
 522
 523static int mark_tagged(const char *path, const unsigned char *sha1, int flag,
 524                       void *cb_data)
 525{
 526        unsigned char peeled[20];
 527        struct object_entry *entry = packlist_find(&to_pack, sha1, NULL);
 528
 529        if (entry)
 530                entry->tagged = 1;
 531        if (!peel_ref(path, peeled)) {
 532                entry = packlist_find(&to_pack, peeled, NULL);
 533                if (entry)
 534                        entry->tagged = 1;
 535        }
 536        return 0;
 537}
 538
 539static inline void add_to_write_order(struct object_entry **wo,
 540                               unsigned int *endp,
 541                               struct object_entry *e)
 542{
 543        if (e->filled)
 544                return;
 545        wo[(*endp)++] = e;
 546        e->filled = 1;
 547}
 548
 549static void add_descendants_to_write_order(struct object_entry **wo,
 550                                           unsigned int *endp,
 551                                           struct object_entry *e)
 552{
 553        int add_to_order = 1;
 554        while (e) {
 555                if (add_to_order) {
 556                        struct object_entry *s;
 557                        /* add this node... */
 558                        add_to_write_order(wo, endp, e);
 559                        /* all its siblings... */
 560                        for (s = e->delta_sibling; s; s = s->delta_sibling) {
 561                                add_to_write_order(wo, endp, s);
 562                        }
 563                }
 564                /* drop down a level to add left subtree nodes if possible */
 565                if (e->delta_child) {
 566                        add_to_order = 1;
 567                        e = e->delta_child;
 568                } else {
 569                        add_to_order = 0;
 570                        /* our sibling might have some children, it is next */
 571                        if (e->delta_sibling) {
 572                                e = e->delta_sibling;
 573                                continue;
 574                        }
 575                        /* go back to our parent node */
 576                        e = e->delta;
 577                        while (e && !e->delta_sibling) {
 578                                /* we're on the right side of a subtree, keep
 579                                 * going up until we can go right again */
 580                                e = e->delta;
 581                        }
 582                        if (!e) {
 583                                /* done- we hit our original root node */
 584                                return;
 585                        }
 586                        /* pass it off to sibling at this level */
 587                        e = e->delta_sibling;
 588                }
 589        };
 590}
 591
 592static void add_family_to_write_order(struct object_entry **wo,
 593                                      unsigned int *endp,
 594                                      struct object_entry *e)
 595{
 596        struct object_entry *root;
 597
 598        for (root = e; root->delta; root = root->delta)
 599                ; /* nothing */
 600        add_descendants_to_write_order(wo, endp, root);
 601}
 602
 603static struct object_entry **compute_write_order(void)
 604{
 605        unsigned int i, wo_end, last_untagged;
 606
 607        struct object_entry **wo = xmalloc(to_pack.nr_objects * sizeof(*wo));
 608        struct object_entry *objects = to_pack.objects;
 609
 610        for (i = 0; i < to_pack.nr_objects; i++) {
 611                objects[i].tagged = 0;
 612                objects[i].filled = 0;
 613                objects[i].delta_child = NULL;
 614                objects[i].delta_sibling = NULL;
 615        }
 616
 617        /*
 618         * Fully connect delta_child/delta_sibling network.
 619         * Make sure delta_sibling is sorted in the original
 620         * recency order.
 621         */
 622        for (i = to_pack.nr_objects; i > 0;) {
 623                struct object_entry *e = &objects[--i];
 624                if (!e->delta)
 625                        continue;
 626                /* Mark me as the first child */
 627                e->delta_sibling = e->delta->delta_child;
 628                e->delta->delta_child = e;
 629        }
 630
 631        /*
 632         * Mark objects that are at the tip of tags.
 633         */
 634        for_each_tag_ref(mark_tagged, NULL);
 635
 636        /*
 637         * Give the objects in the original recency order until
 638         * we see a tagged tip.
 639         */
 640        for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
 641                if (objects[i].tagged)
 642                        break;
 643                add_to_write_order(wo, &wo_end, &objects[i]);
 644        }
 645        last_untagged = i;
 646
 647        /*
 648         * Then fill all the tagged tips.
 649         */
 650        for (; i < to_pack.nr_objects; i++) {
 651                if (objects[i].tagged)
 652                        add_to_write_order(wo, &wo_end, &objects[i]);
 653        }
 654
 655        /*
 656         * And then all remaining commits and tags.
 657         */
 658        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 659                if (objects[i].type != OBJ_COMMIT &&
 660                    objects[i].type != OBJ_TAG)
 661                        continue;
 662                add_to_write_order(wo, &wo_end, &objects[i]);
 663        }
 664
 665        /*
 666         * And then all the trees.
 667         */
 668        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 669                if (objects[i].type != OBJ_TREE)
 670                        continue;
 671                add_to_write_order(wo, &wo_end, &objects[i]);
 672        }
 673
 674        /*
 675         * Finally all the rest in really tight order
 676         */
 677        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 678                if (!objects[i].filled)
 679                        add_family_to_write_order(wo, &wo_end, &objects[i]);
 680        }
 681
 682        if (wo_end != to_pack.nr_objects)
 683                die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
 684
 685        return wo;
 686}
 687
 688static off_t write_reused_pack(struct sha1file *f)
 689{
 690        unsigned char buffer[8192];
 691        off_t to_write;
 692        int fd;
 693
 694        if (!is_pack_valid(reuse_packfile))
 695                die("packfile is invalid: %s", reuse_packfile->pack_name);
 696
 697        fd = git_open_noatime(reuse_packfile->pack_name);
 698        if (fd < 0)
 699                die_errno("unable to open packfile for reuse: %s",
 700                          reuse_packfile->pack_name);
 701
 702        if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
 703                die_errno("unable to seek in reused packfile");
 704
 705        if (reuse_packfile_offset < 0)
 706                reuse_packfile_offset = reuse_packfile->pack_size - 20;
 707
 708        to_write = reuse_packfile_offset - sizeof(struct pack_header);
 709
 710        while (to_write) {
 711                int read_pack = xread(fd, buffer, sizeof(buffer));
 712
 713                if (read_pack <= 0)
 714                        die_errno("unable to read from reused packfile");
 715
 716                if (read_pack > to_write)
 717                        read_pack = to_write;
 718
 719                sha1write(f, buffer, read_pack);
 720                to_write -= read_pack;
 721        }
 722
 723        close(fd);
 724        written += reuse_packfile_objects;
 725        return reuse_packfile_offset - sizeof(struct pack_header);
 726}
 727
 728static void write_pack_file(void)
 729{
 730        uint32_t i = 0, j;
 731        struct sha1file *f;
 732        off_t offset;
 733        uint32_t nr_remaining = nr_result;
 734        time_t last_mtime = 0;
 735        struct object_entry **write_order;
 736
 737        if (progress > pack_to_stdout)
 738                progress_state = start_progress("Writing objects", nr_result);
 739        written_list = xmalloc(to_pack.nr_objects * sizeof(*written_list));
 740        write_order = compute_write_order();
 741
 742        do {
 743                unsigned char sha1[20];
 744                char *pack_tmp_name = NULL;
 745
 746                if (pack_to_stdout)
 747                        f = sha1fd_throughput(1, "<stdout>", progress_state);
 748                else
 749                        f = create_tmp_packfile(&pack_tmp_name);
 750
 751                offset = write_pack_header(f, nr_remaining);
 752                if (!offset)
 753                        die_errno("unable to write pack header");
 754
 755                if (reuse_packfile) {
 756                        off_t packfile_size;
 757                        assert(pack_to_stdout);
 758
 759                        packfile_size = write_reused_pack(f);
 760                        offset += packfile_size;
 761                }
 762
 763                nr_written = 0;
 764                for (; i < to_pack.nr_objects; i++) {
 765                        struct object_entry *e = write_order[i];
 766                        if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
 767                                break;
 768                        display_progress(progress_state, written);
 769                }
 770
 771                /*
 772                 * Did we write the wrong # entries in the header?
 773                 * If so, rewrite it like in fast-import
 774                 */
 775                if (pack_to_stdout) {
 776                        sha1close(f, sha1, CSUM_CLOSE);
 777                } else if (nr_written == nr_remaining) {
 778                        sha1close(f, sha1, CSUM_FSYNC);
 779                } else {
 780                        int fd = sha1close(f, sha1, 0);
 781                        fixup_pack_header_footer(fd, sha1, pack_tmp_name,
 782                                                 nr_written, sha1, offset);
 783                        close(fd);
 784                }
 785
 786                if (!pack_to_stdout) {
 787                        struct stat st;
 788                        char tmpname[PATH_MAX];
 789
 790                        /*
 791                         * Packs are runtime accessed in their mtime
 792                         * order since newer packs are more likely to contain
 793                         * younger objects.  So if we are creating multiple
 794                         * packs then we should modify the mtime of later ones
 795                         * to preserve this property.
 796                         */
 797                        if (stat(pack_tmp_name, &st) < 0) {
 798                                warning("failed to stat %s: %s",
 799                                        pack_tmp_name, strerror(errno));
 800                        } else if (!last_mtime) {
 801                                last_mtime = st.st_mtime;
 802                        } else {
 803                                struct utimbuf utb;
 804                                utb.actime = st.st_atime;
 805                                utb.modtime = --last_mtime;
 806                                if (utime(pack_tmp_name, &utb) < 0)
 807                                        warning("failed utime() on %s: %s",
 808                                                tmpname, strerror(errno));
 809                        }
 810
 811                        /* Enough space for "-<sha-1>.pack"? */
 812                        if (sizeof(tmpname) <= strlen(base_name) + 50)
 813                                die("pack base name '%s' too long", base_name);
 814                        snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
 815                        finish_tmp_packfile(tmpname, pack_tmp_name,
 816                                            written_list, nr_written,
 817                                            &pack_idx_opts, sha1);
 818                        free(pack_tmp_name);
 819                        puts(sha1_to_hex(sha1));
 820                }
 821
 822                /* mark written objects as written to previous pack */
 823                for (j = 0; j < nr_written; j++) {
 824                        written_list[j]->offset = (off_t)-1;
 825                }
 826                nr_remaining -= nr_written;
 827        } while (nr_remaining && i < to_pack.nr_objects);
 828
 829        free(written_list);
 830        free(write_order);
 831        stop_progress(&progress_state);
 832        if (written != nr_result)
 833                die("wrote %"PRIu32" objects while expecting %"PRIu32,
 834                        written, nr_result);
 835}
 836
 837static void setup_delta_attr_check(struct git_attr_check *check)
 838{
 839        static struct git_attr *attr_delta;
 840
 841        if (!attr_delta)
 842                attr_delta = git_attr("delta");
 843
 844        check[0].attr = attr_delta;
 845}
 846
 847static int no_try_delta(const char *path)
 848{
 849        struct git_attr_check check[1];
 850
 851        setup_delta_attr_check(check);
 852        if (git_check_attr(path, ARRAY_SIZE(check), check))
 853                return 0;
 854        if (ATTR_FALSE(check->value))
 855                return 1;
 856        return 0;
 857}
 858
 859/*
 860 * When adding an object, check whether we have already added it
 861 * to our packing list. If so, we can skip. However, if we are
 862 * being asked to excludei t, but the previous mention was to include
 863 * it, make sure to adjust its flags and tweak our numbers accordingly.
 864 *
 865 * As an optimization, we pass out the index position where we would have
 866 * found the item, since that saves us from having to look it up again a
 867 * few lines later when we want to add the new entry.
 868 */
 869static int have_duplicate_entry(const unsigned char *sha1,
 870                                int exclude,
 871                                uint32_t *index_pos)
 872{
 873        struct object_entry *entry;
 874
 875        entry = packlist_find(&to_pack, sha1, index_pos);
 876        if (!entry)
 877                return 0;
 878
 879        if (exclude) {
 880                if (!entry->preferred_base)
 881                        nr_result--;
 882                entry->preferred_base = 1;
 883        }
 884
 885        return 1;
 886}
 887
 888/*
 889 * Check whether we want the object in the pack (e.g., we do not want
 890 * objects found in non-local stores if the "--local" option was used).
 891 *
 892 * As a side effect of this check, we will find the packed version of this
 893 * object, if any. We therefore pass out the pack information to avoid having
 894 * to look it up again later.
 895 */
 896static int want_object_in_pack(const unsigned char *sha1,
 897                               int exclude,
 898                               struct packed_git **found_pack,
 899                               off_t *found_offset)
 900{
 901        struct packed_git *p;
 902
 903        if (!exclude && local && has_loose_object_nonlocal(sha1))
 904                return 0;
 905
 906        *found_pack = NULL;
 907        *found_offset = 0;
 908
 909        for (p = packed_git; p; p = p->next) {
 910                off_t offset = find_pack_entry_one(sha1, p);
 911                if (offset) {
 912                        if (!*found_pack) {
 913                                if (!is_pack_valid(p)) {
 914                                        warning("packfile %s cannot be accessed", p->pack_name);
 915                                        continue;
 916                                }
 917                                *found_offset = offset;
 918                                *found_pack = p;
 919                        }
 920                        if (exclude)
 921                                return 1;
 922                        if (incremental)
 923                                return 0;
 924                        if (local && !p->pack_local)
 925                                return 0;
 926                        if (ignore_packed_keep && p->pack_local && p->pack_keep)
 927                                return 0;
 928                }
 929        }
 930
 931        return 1;
 932}
 933
 934static void create_object_entry(const unsigned char *sha1,
 935                                enum object_type type,
 936                                uint32_t hash,
 937                                int exclude,
 938                                int no_try_delta,
 939                                uint32_t index_pos,
 940                                struct packed_git *found_pack,
 941                                off_t found_offset)
 942{
 943        struct object_entry *entry;
 944
 945        entry = packlist_alloc(&to_pack, sha1, index_pos);
 946        entry->hash = hash;
 947        if (type)
 948                entry->type = type;
 949        if (exclude)
 950                entry->preferred_base = 1;
 951        else
 952                nr_result++;
 953        if (found_pack) {
 954                entry->in_pack = found_pack;
 955                entry->in_pack_offset = found_offset;
 956        }
 957
 958        entry->no_try_delta = no_try_delta;
 959}
 960
 961static int add_object_entry(const unsigned char *sha1, enum object_type type,
 962                            const char *name, int exclude)
 963{
 964        struct packed_git *found_pack;
 965        off_t found_offset;
 966        uint32_t index_pos;
 967
 968        if (have_duplicate_entry(sha1, exclude, &index_pos))
 969                return 0;
 970
 971        if (!want_object_in_pack(sha1, exclude, &found_pack, &found_offset))
 972                return 0;
 973
 974        create_object_entry(sha1, type, pack_name_hash(name),
 975                            exclude, name && no_try_delta(name),
 976                            index_pos, found_pack, found_offset);
 977
 978        display_progress(progress_state, to_pack.nr_objects);
 979        return 1;
 980}
 981
 982static int add_object_entry_from_bitmap(const unsigned char *sha1,
 983                                        enum object_type type,
 984                                        int flags, uint32_t name_hash,
 985                                        struct packed_git *pack, off_t offset)
 986{
 987        uint32_t index_pos;
 988
 989        if (have_duplicate_entry(sha1, 0, &index_pos))
 990                return 0;
 991
 992        create_object_entry(sha1, type, name_hash, 0, 0, index_pos, pack, offset);
 993
 994        display_progress(progress_state, to_pack.nr_objects);
 995        return 1;
 996}
 997
 998struct pbase_tree_cache {
 999        unsigned char sha1[20];
1000        int ref;
1001        int temporary;
1002        void *tree_data;
1003        unsigned long tree_size;
1004};
1005
1006static struct pbase_tree_cache *(pbase_tree_cache[256]);
1007static int pbase_tree_cache_ix(const unsigned char *sha1)
1008{
1009        return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
1010}
1011static int pbase_tree_cache_ix_incr(int ix)
1012{
1013        return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1014}
1015
1016static struct pbase_tree {
1017        struct pbase_tree *next;
1018        /* This is a phony "cache" entry; we are not
1019         * going to evict it nor find it through _get()
1020         * mechanism -- this is for the toplevel node that
1021         * would almost always change with any commit.
1022         */
1023        struct pbase_tree_cache pcache;
1024} *pbase_tree;
1025
1026static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
1027{
1028        struct pbase_tree_cache *ent, *nent;
1029        void *data;
1030        unsigned long size;
1031        enum object_type type;
1032        int neigh;
1033        int my_ix = pbase_tree_cache_ix(sha1);
1034        int available_ix = -1;
1035
1036        /* pbase-tree-cache acts as a limited hashtable.
1037         * your object will be found at your index or within a few
1038         * slots after that slot if it is cached.
1039         */
1040        for (neigh = 0; neigh < 8; neigh++) {
1041                ent = pbase_tree_cache[my_ix];
1042                if (ent && !hashcmp(ent->sha1, sha1)) {
1043                        ent->ref++;
1044                        return ent;
1045                }
1046                else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1047                         ((0 <= available_ix) &&
1048                          (!ent && pbase_tree_cache[available_ix])))
1049                        available_ix = my_ix;
1050                if (!ent)
1051                        break;
1052                my_ix = pbase_tree_cache_ix_incr(my_ix);
1053        }
1054
1055        /* Did not find one.  Either we got a bogus request or
1056         * we need to read and perhaps cache.
1057         */
1058        data = read_sha1_file(sha1, &type, &size);
1059        if (!data)
1060                return NULL;
1061        if (type != OBJ_TREE) {
1062                free(data);
1063                return NULL;
1064        }
1065
1066        /* We need to either cache or return a throwaway copy */
1067
1068        if (available_ix < 0)
1069                ent = NULL;
1070        else {
1071                ent = pbase_tree_cache[available_ix];
1072                my_ix = available_ix;
1073        }
1074
1075        if (!ent) {
1076                nent = xmalloc(sizeof(*nent));
1077                nent->temporary = (available_ix < 0);
1078        }
1079        else {
1080                /* evict and reuse */
1081                free(ent->tree_data);
1082                nent = ent;
1083        }
1084        hashcpy(nent->sha1, sha1);
1085        nent->tree_data = data;
1086        nent->tree_size = size;
1087        nent->ref = 1;
1088        if (!nent->temporary)
1089                pbase_tree_cache[my_ix] = nent;
1090        return nent;
1091}
1092
1093static void pbase_tree_put(struct pbase_tree_cache *cache)
1094{
1095        if (!cache->temporary) {
1096                cache->ref--;
1097                return;
1098        }
1099        free(cache->tree_data);
1100        free(cache);
1101}
1102
1103static int name_cmp_len(const char *name)
1104{
1105        int i;
1106        for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
1107                ;
1108        return i;
1109}
1110
1111static void add_pbase_object(struct tree_desc *tree,
1112                             const char *name,
1113                             int cmplen,
1114                             const char *fullname)
1115{
1116        struct name_entry entry;
1117        int cmp;
1118
1119        while (tree_entry(tree,&entry)) {
1120                if (S_ISGITLINK(entry.mode))
1121                        continue;
1122                cmp = tree_entry_len(&entry) != cmplen ? 1 :
1123                      memcmp(name, entry.path, cmplen);
1124                if (cmp > 0)
1125                        continue;
1126                if (cmp < 0)
1127                        return;
1128                if (name[cmplen] != '/') {
1129                        add_object_entry(entry.sha1,
1130                                         object_type(entry.mode),
1131                                         fullname, 1);
1132                        return;
1133                }
1134                if (S_ISDIR(entry.mode)) {
1135                        struct tree_desc sub;
1136                        struct pbase_tree_cache *tree;
1137                        const char *down = name+cmplen+1;
1138                        int downlen = name_cmp_len(down);
1139
1140                        tree = pbase_tree_get(entry.sha1);
1141                        if (!tree)
1142                                return;
1143                        init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1144
1145                        add_pbase_object(&sub, down, downlen, fullname);
1146                        pbase_tree_put(tree);
1147                }
1148        }
1149}
1150
1151static unsigned *done_pbase_paths;
1152static int done_pbase_paths_num;
1153static int done_pbase_paths_alloc;
1154static int done_pbase_path_pos(unsigned hash)
1155{
1156        int lo = 0;
1157        int hi = done_pbase_paths_num;
1158        while (lo < hi) {
1159                int mi = (hi + lo) / 2;
1160                if (done_pbase_paths[mi] == hash)
1161                        return mi;
1162                if (done_pbase_paths[mi] < hash)
1163                        hi = mi;
1164                else
1165                        lo = mi + 1;
1166        }
1167        return -lo-1;
1168}
1169
1170static int check_pbase_path(unsigned hash)
1171{
1172        int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
1173        if (0 <= pos)
1174                return 1;
1175        pos = -pos - 1;
1176        if (done_pbase_paths_alloc <= done_pbase_paths_num) {
1177                done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
1178                done_pbase_paths = xrealloc(done_pbase_paths,
1179                                            done_pbase_paths_alloc *
1180                                            sizeof(unsigned));
1181        }
1182        done_pbase_paths_num++;
1183        if (pos < done_pbase_paths_num)
1184                memmove(done_pbase_paths + pos + 1,
1185                        done_pbase_paths + pos,
1186                        (done_pbase_paths_num - pos - 1) * sizeof(unsigned));
1187        done_pbase_paths[pos] = hash;
1188        return 0;
1189}
1190
1191static void add_preferred_base_object(const char *name)
1192{
1193        struct pbase_tree *it;
1194        int cmplen;
1195        unsigned hash = pack_name_hash(name);
1196
1197        if (!num_preferred_base || check_pbase_path(hash))
1198                return;
1199
1200        cmplen = name_cmp_len(name);
1201        for (it = pbase_tree; it; it = it->next) {
1202                if (cmplen == 0) {
1203                        add_object_entry(it->pcache.sha1, OBJ_TREE, NULL, 1);
1204                }
1205                else {
1206                        struct tree_desc tree;
1207                        init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1208                        add_pbase_object(&tree, name, cmplen, name);
1209                }
1210        }
1211}
1212
1213static void add_preferred_base(unsigned char *sha1)
1214{
1215        struct pbase_tree *it;
1216        void *data;
1217        unsigned long size;
1218        unsigned char tree_sha1[20];
1219
1220        if (window <= num_preferred_base++)
1221                return;
1222
1223        data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
1224        if (!data)
1225                return;
1226
1227        for (it = pbase_tree; it; it = it->next) {
1228                if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1229                        free(data);
1230                        return;
1231                }
1232        }
1233
1234        it = xcalloc(1, sizeof(*it));
1235        it->next = pbase_tree;
1236        pbase_tree = it;
1237
1238        hashcpy(it->pcache.sha1, tree_sha1);
1239        it->pcache.tree_data = data;
1240        it->pcache.tree_size = size;
1241}
1242
1243static void cleanup_preferred_base(void)
1244{
1245        struct pbase_tree *it;
1246        unsigned i;
1247
1248        it = pbase_tree;
1249        pbase_tree = NULL;
1250        while (it) {
1251                struct pbase_tree *this = it;
1252                it = this->next;
1253                free(this->pcache.tree_data);
1254                free(this);
1255        }
1256
1257        for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1258                if (!pbase_tree_cache[i])
1259                        continue;
1260                free(pbase_tree_cache[i]->tree_data);
1261                free(pbase_tree_cache[i]);
1262                pbase_tree_cache[i] = NULL;
1263        }
1264
1265        free(done_pbase_paths);
1266        done_pbase_paths = NULL;
1267        done_pbase_paths_num = done_pbase_paths_alloc = 0;
1268}
1269
1270static void check_object(struct object_entry *entry)
1271{
1272        if (entry->in_pack) {
1273                struct packed_git *p = entry->in_pack;
1274                struct pack_window *w_curs = NULL;
1275                const unsigned char *base_ref = NULL;
1276                struct object_entry *base_entry;
1277                unsigned long used, used_0;
1278                unsigned long avail;
1279                off_t ofs;
1280                unsigned char *buf, c;
1281
1282                buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1283
1284                /*
1285                 * We want in_pack_type even if we do not reuse delta
1286                 * since non-delta representations could still be reused.
1287                 */
1288                used = unpack_object_header_buffer(buf, avail,
1289                                                   &entry->in_pack_type,
1290                                                   &entry->size);
1291                if (used == 0)
1292                        goto give_up;
1293
1294                /*
1295                 * Determine if this is a delta and if so whether we can
1296                 * reuse it or not.  Otherwise let's find out as cheaply as
1297                 * possible what the actual type and size for this object is.
1298                 */
1299                switch (entry->in_pack_type) {
1300                default:
1301                        /* Not a delta hence we've already got all we need. */
1302                        entry->type = entry->in_pack_type;
1303                        entry->in_pack_header_size = used;
1304                        if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)
1305                                goto give_up;
1306                        unuse_pack(&w_curs);
1307                        return;
1308                case OBJ_REF_DELTA:
1309                        if (reuse_delta && !entry->preferred_base)
1310                                base_ref = use_pack(p, &w_curs,
1311                                                entry->in_pack_offset + used, NULL);
1312                        entry->in_pack_header_size = used + 20;
1313                        break;
1314                case OBJ_OFS_DELTA:
1315                        buf = use_pack(p, &w_curs,
1316                                       entry->in_pack_offset + used, NULL);
1317                        used_0 = 0;
1318                        c = buf[used_0++];
1319                        ofs = c & 127;
1320                        while (c & 128) {
1321                                ofs += 1;
1322                                if (!ofs || MSB(ofs, 7)) {
1323                                        error("delta base offset overflow in pack for %s",
1324                                              sha1_to_hex(entry->idx.sha1));
1325                                        goto give_up;
1326                                }
1327                                c = buf[used_0++];
1328                                ofs = (ofs << 7) + (c & 127);
1329                        }
1330                        ofs = entry->in_pack_offset - ofs;
1331                        if (ofs <= 0 || ofs >= entry->in_pack_offset) {
1332                                error("delta base offset out of bound for %s",
1333                                      sha1_to_hex(entry->idx.sha1));
1334                                goto give_up;
1335                        }
1336                        if (reuse_delta && !entry->preferred_base) {
1337                                struct revindex_entry *revidx;
1338                                revidx = find_pack_revindex(p, ofs);
1339                                if (!revidx)
1340                                        goto give_up;
1341                                base_ref = nth_packed_object_sha1(p, revidx->nr);
1342                        }
1343                        entry->in_pack_header_size = used + used_0;
1344                        break;
1345                }
1346
1347                if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
1348                        /*
1349                         * If base_ref was set above that means we wish to
1350                         * reuse delta data, and we even found that base
1351                         * in the list of objects we want to pack. Goodie!
1352                         *
1353                         * Depth value does not matter - find_deltas() will
1354                         * never consider reused delta as the base object to
1355                         * deltify other objects against, in order to avoid
1356                         * circular deltas.
1357                         */
1358                        entry->type = entry->in_pack_type;
1359                        entry->delta = base_entry;
1360                        entry->delta_size = entry->size;
1361                        entry->delta_sibling = base_entry->delta_child;
1362                        base_entry->delta_child = entry;
1363                        unuse_pack(&w_curs);
1364                        return;
1365                }
1366
1367                if (entry->type) {
1368                        /*
1369                         * This must be a delta and we already know what the
1370                         * final object type is.  Let's extract the actual
1371                         * object size from the delta header.
1372                         */
1373                        entry->size = get_size_from_delta(p, &w_curs,
1374                                        entry->in_pack_offset + entry->in_pack_header_size);
1375                        if (entry->size == 0)
1376                                goto give_up;
1377                        unuse_pack(&w_curs);
1378                        return;
1379                }
1380
1381                /*
1382                 * No choice but to fall back to the recursive delta walk
1383                 * with sha1_object_info() to find about the object type
1384                 * at this point...
1385                 */
1386                give_up:
1387                unuse_pack(&w_curs);
1388        }
1389
1390        entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
1391        /*
1392         * The error condition is checked in prepare_pack().  This is
1393         * to permit a missing preferred base object to be ignored
1394         * as a preferred base.  Doing so can result in a larger
1395         * pack file, but the transfer will still take place.
1396         */
1397}
1398
1399static int pack_offset_sort(const void *_a, const void *_b)
1400{
1401        const struct object_entry *a = *(struct object_entry **)_a;
1402        const struct object_entry *b = *(struct object_entry **)_b;
1403
1404        /* avoid filesystem trashing with loose objects */
1405        if (!a->in_pack && !b->in_pack)
1406                return hashcmp(a->idx.sha1, b->idx.sha1);
1407
1408        if (a->in_pack < b->in_pack)
1409                return -1;
1410        if (a->in_pack > b->in_pack)
1411                return 1;
1412        return a->in_pack_offset < b->in_pack_offset ? -1 :
1413                        (a->in_pack_offset > b->in_pack_offset);
1414}
1415
1416static void get_object_details(void)
1417{
1418        uint32_t i;
1419        struct object_entry **sorted_by_offset;
1420
1421        sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
1422        for (i = 0; i < to_pack.nr_objects; i++)
1423                sorted_by_offset[i] = to_pack.objects + i;
1424        qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1425
1426        for (i = 0; i < to_pack.nr_objects; i++) {
1427                struct object_entry *entry = sorted_by_offset[i];
1428                check_object(entry);
1429                if (big_file_threshold < entry->size)
1430                        entry->no_try_delta = 1;
1431        }
1432
1433        free(sorted_by_offset);
1434}
1435
1436/*
1437 * We search for deltas in a list sorted by type, by filename hash, and then
1438 * by size, so that we see progressively smaller and smaller files.
1439 * That's because we prefer deltas to be from the bigger file
1440 * to the smaller -- deletes are potentially cheaper, but perhaps
1441 * more importantly, the bigger file is likely the more recent
1442 * one.  The deepest deltas are therefore the oldest objects which are
1443 * less susceptible to be accessed often.
1444 */
1445static int type_size_sort(const void *_a, const void *_b)
1446{
1447        const struct object_entry *a = *(struct object_entry **)_a;
1448        const struct object_entry *b = *(struct object_entry **)_b;
1449
1450        if (a->type > b->type)
1451                return -1;
1452        if (a->type < b->type)
1453                return 1;
1454        if (a->hash > b->hash)
1455                return -1;
1456        if (a->hash < b->hash)
1457                return 1;
1458        if (a->preferred_base > b->preferred_base)
1459                return -1;
1460        if (a->preferred_base < b->preferred_base)
1461                return 1;
1462        if (a->size > b->size)
1463                return -1;
1464        if (a->size < b->size)
1465                return 1;
1466        return a < b ? -1 : (a > b);  /* newest first */
1467}
1468
1469struct unpacked {
1470        struct object_entry *entry;
1471        void *data;
1472        struct delta_index *index;
1473        unsigned depth;
1474};
1475
1476static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
1477                           unsigned long delta_size)
1478{
1479        if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
1480                return 0;
1481
1482        if (delta_size < cache_max_small_delta_size)
1483                return 1;
1484
1485        /* cache delta, if objects are large enough compared to delta size */
1486        if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
1487                return 1;
1488
1489        return 0;
1490}
1491
1492#ifndef NO_PTHREADS
1493
1494static pthread_mutex_t read_mutex;
1495#define read_lock()             pthread_mutex_lock(&read_mutex)
1496#define read_unlock()           pthread_mutex_unlock(&read_mutex)
1497
1498static pthread_mutex_t cache_mutex;
1499#define cache_lock()            pthread_mutex_lock(&cache_mutex)
1500#define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
1501
1502static pthread_mutex_t progress_mutex;
1503#define progress_lock()         pthread_mutex_lock(&progress_mutex)
1504#define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
1505
1506#else
1507
1508#define read_lock()             (void)0
1509#define read_unlock()           (void)0
1510#define cache_lock()            (void)0
1511#define cache_unlock()          (void)0
1512#define progress_lock()         (void)0
1513#define progress_unlock()       (void)0
1514
1515#endif
1516
1517static int try_delta(struct unpacked *trg, struct unpacked *src,
1518                     unsigned max_depth, unsigned long *mem_usage)
1519{
1520        struct object_entry *trg_entry = trg->entry;
1521        struct object_entry *src_entry = src->entry;
1522        unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1523        unsigned ref_depth;
1524        enum object_type type;
1525        void *delta_buf;
1526
1527        /* Don't bother doing diffs between different types */
1528        if (trg_entry->type != src_entry->type)
1529                return -1;
1530
1531        /*
1532         * We do not bother to try a delta that we discarded on an
1533         * earlier try, but only when reusing delta data.  Note that
1534         * src_entry that is marked as the preferred_base should always
1535         * be considered, as even if we produce a suboptimal delta against
1536         * it, we will still save the transfer cost, as we already know
1537         * the other side has it and we won't send src_entry at all.
1538         */
1539        if (reuse_delta && trg_entry->in_pack &&
1540            trg_entry->in_pack == src_entry->in_pack &&
1541            !src_entry->preferred_base &&
1542            trg_entry->in_pack_type != OBJ_REF_DELTA &&
1543            trg_entry->in_pack_type != OBJ_OFS_DELTA)
1544                return 0;
1545
1546        /* Let's not bust the allowed depth. */
1547        if (src->depth >= max_depth)
1548                return 0;
1549
1550        /* Now some size filtering heuristics. */
1551        trg_size = trg_entry->size;
1552        if (!trg_entry->delta) {
1553                max_size = trg_size/2 - 20;
1554                ref_depth = 1;
1555        } else {
1556                max_size = trg_entry->delta_size;
1557                ref_depth = trg->depth;
1558        }
1559        max_size = (uint64_t)max_size * (max_depth - src->depth) /
1560                                                (max_depth - ref_depth + 1);
1561        if (max_size == 0)
1562                return 0;
1563        src_size = src_entry->size;
1564        sizediff = src_size < trg_size ? trg_size - src_size : 0;
1565        if (sizediff >= max_size)
1566                return 0;
1567        if (trg_size < src_size / 32)
1568                return 0;
1569
1570        /* Load data if not already done */
1571        if (!trg->data) {
1572                read_lock();
1573                trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz);
1574                read_unlock();
1575                if (!trg->data)
1576                        die("object %s cannot be read",
1577                            sha1_to_hex(trg_entry->idx.sha1));
1578                if (sz != trg_size)
1579                        die("object %s inconsistent object length (%lu vs %lu)",
1580                            sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);
1581                *mem_usage += sz;
1582        }
1583        if (!src->data) {
1584                read_lock();
1585                src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz);
1586                read_unlock();
1587                if (!src->data) {
1588                        if (src_entry->preferred_base) {
1589                                static int warned = 0;
1590                                if (!warned++)
1591                                        warning("object %s cannot be read",
1592                                                sha1_to_hex(src_entry->idx.sha1));
1593                                /*
1594                                 * Those objects are not included in the
1595                                 * resulting pack.  Be resilient and ignore
1596                                 * them if they can't be read, in case the
1597                                 * pack could be created nevertheless.
1598                                 */
1599                                return 0;
1600                        }
1601                        die("object %s cannot be read",
1602                            sha1_to_hex(src_entry->idx.sha1));
1603                }
1604                if (sz != src_size)
1605                        die("object %s inconsistent object length (%lu vs %lu)",
1606                            sha1_to_hex(src_entry->idx.sha1), sz, src_size);
1607                *mem_usage += sz;
1608        }
1609        if (!src->index) {
1610                src->index = create_delta_index(src->data, src_size);
1611                if (!src->index) {
1612                        static int warned = 0;
1613                        if (!warned++)
1614                                warning("suboptimal pack - out of memory");
1615                        return 0;
1616                }
1617                *mem_usage += sizeof_delta_index(src->index);
1618        }
1619
1620        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1621        if (!delta_buf)
1622                return 0;
1623
1624        if (trg_entry->delta) {
1625                /* Prefer only shallower same-sized deltas. */
1626                if (delta_size == trg_entry->delta_size &&
1627                    src->depth + 1 >= trg->depth) {
1628                        free(delta_buf);
1629                        return 0;
1630                }
1631        }
1632
1633        /*
1634         * Handle memory allocation outside of the cache
1635         * accounting lock.  Compiler will optimize the strangeness
1636         * away when NO_PTHREADS is defined.
1637         */
1638        free(trg_entry->delta_data);
1639        cache_lock();
1640        if (trg_entry->delta_data) {
1641                delta_cache_size -= trg_entry->delta_size;
1642                trg_entry->delta_data = NULL;
1643        }
1644        if (delta_cacheable(src_size, trg_size, delta_size)) {
1645                delta_cache_size += delta_size;
1646                cache_unlock();
1647                trg_entry->delta_data = xrealloc(delta_buf, delta_size);
1648        } else {
1649                cache_unlock();
1650                free(delta_buf);
1651        }
1652
1653        trg_entry->delta = src_entry;
1654        trg_entry->delta_size = delta_size;
1655        trg->depth = src->depth + 1;
1656
1657        return 1;
1658}
1659
1660static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1661{
1662        struct object_entry *child = me->delta_child;
1663        unsigned int m = n;
1664        while (child) {
1665                unsigned int c = check_delta_limit(child, n + 1);
1666                if (m < c)
1667                        m = c;
1668                child = child->delta_sibling;
1669        }
1670        return m;
1671}
1672
1673static unsigned long free_unpacked(struct unpacked *n)
1674{
1675        unsigned long freed_mem = sizeof_delta_index(n->index);
1676        free_delta_index(n->index);
1677        n->index = NULL;
1678        if (n->data) {
1679                freed_mem += n->entry->size;
1680                free(n->data);
1681                n->data = NULL;
1682        }
1683        n->entry = NULL;
1684        n->depth = 0;
1685        return freed_mem;
1686}
1687
1688static void find_deltas(struct object_entry **list, unsigned *list_size,
1689                        int window, int depth, unsigned *processed)
1690{
1691        uint32_t i, idx = 0, count = 0;
1692        struct unpacked *array;
1693        unsigned long mem_usage = 0;
1694
1695        array = xcalloc(window, sizeof(struct unpacked));
1696
1697        for (;;) {
1698                struct object_entry *entry;
1699                struct unpacked *n = array + idx;
1700                int j, max_depth, best_base = -1;
1701
1702                progress_lock();
1703                if (!*list_size) {
1704                        progress_unlock();
1705                        break;
1706                }
1707                entry = *list++;
1708                (*list_size)--;
1709                if (!entry->preferred_base) {
1710                        (*processed)++;
1711                        display_progress(progress_state, *processed);
1712                }
1713                progress_unlock();
1714
1715                mem_usage -= free_unpacked(n);
1716                n->entry = entry;
1717
1718                while (window_memory_limit &&
1719                       mem_usage > window_memory_limit &&
1720                       count > 1) {
1721                        uint32_t tail = (idx + window - count) % window;
1722                        mem_usage -= free_unpacked(array + tail);
1723                        count--;
1724                }
1725
1726                /* We do not compute delta to *create* objects we are not
1727                 * going to pack.
1728                 */
1729                if (entry->preferred_base)
1730                        goto next;
1731
1732                /*
1733                 * If the current object is at pack edge, take the depth the
1734                 * objects that depend on the current object into account
1735                 * otherwise they would become too deep.
1736                 */
1737                max_depth = depth;
1738                if (entry->delta_child) {
1739                        max_depth -= check_delta_limit(entry, 0);
1740                        if (max_depth <= 0)
1741                                goto next;
1742                }
1743
1744                j = window;
1745                while (--j > 0) {
1746                        int ret;
1747                        uint32_t other_idx = idx + j;
1748                        struct unpacked *m;
1749                        if (other_idx >= window)
1750                                other_idx -= window;
1751                        m = array + other_idx;
1752                        if (!m->entry)
1753                                break;
1754                        ret = try_delta(n, m, max_depth, &mem_usage);
1755                        if (ret < 0)
1756                                break;
1757                        else if (ret > 0)
1758                                best_base = other_idx;
1759                }
1760
1761                /*
1762                 * If we decided to cache the delta data, then it is best
1763                 * to compress it right away.  First because we have to do
1764                 * it anyway, and doing it here while we're threaded will
1765                 * save a lot of time in the non threaded write phase,
1766                 * as well as allow for caching more deltas within
1767                 * the same cache size limit.
1768                 * ...
1769                 * But only if not writing to stdout, since in that case
1770                 * the network is most likely throttling writes anyway,
1771                 * and therefore it is best to go to the write phase ASAP
1772                 * instead, as we can afford spending more time compressing
1773                 * between writes at that moment.
1774                 */
1775                if (entry->delta_data && !pack_to_stdout) {
1776                        entry->z_delta_size = do_compress(&entry->delta_data,
1777                                                          entry->delta_size);
1778                        cache_lock();
1779                        delta_cache_size -= entry->delta_size;
1780                        delta_cache_size += entry->z_delta_size;
1781                        cache_unlock();
1782                }
1783
1784                /* if we made n a delta, and if n is already at max
1785                 * depth, leaving it in the window is pointless.  we
1786                 * should evict it first.
1787                 */
1788                if (entry->delta && max_depth <= n->depth)
1789                        continue;
1790
1791                /*
1792                 * Move the best delta base up in the window, after the
1793                 * currently deltified object, to keep it longer.  It will
1794                 * be the first base object to be attempted next.
1795                 */
1796                if (entry->delta) {
1797                        struct unpacked swap = array[best_base];
1798                        int dist = (window + idx - best_base) % window;
1799                        int dst = best_base;
1800                        while (dist--) {
1801                                int src = (dst + 1) % window;
1802                                array[dst] = array[src];
1803                                dst = src;
1804                        }
1805                        array[dst] = swap;
1806                }
1807
1808                next:
1809                idx++;
1810                if (count + 1 < window)
1811                        count++;
1812                if (idx >= window)
1813                        idx = 0;
1814        }
1815
1816        for (i = 0; i < window; ++i) {
1817                free_delta_index(array[i].index);
1818                free(array[i].data);
1819        }
1820        free(array);
1821}
1822
1823#ifndef NO_PTHREADS
1824
1825static void try_to_free_from_threads(size_t size)
1826{
1827        read_lock();
1828        release_pack_memory(size);
1829        read_unlock();
1830}
1831
1832static try_to_free_t old_try_to_free_routine;
1833
1834/*
1835 * The main thread waits on the condition that (at least) one of the workers
1836 * has stopped working (which is indicated in the .working member of
1837 * struct thread_params).
1838 * When a work thread has completed its work, it sets .working to 0 and
1839 * signals the main thread and waits on the condition that .data_ready
1840 * becomes 1.
1841 */
1842
1843struct thread_params {
1844        pthread_t thread;
1845        struct object_entry **list;
1846        unsigned list_size;
1847        unsigned remaining;
1848        int window;
1849        int depth;
1850        int working;
1851        int data_ready;
1852        pthread_mutex_t mutex;
1853        pthread_cond_t cond;
1854        unsigned *processed;
1855};
1856
1857static pthread_cond_t progress_cond;
1858
1859/*
1860 * Mutex and conditional variable can't be statically-initialized on Windows.
1861 */
1862static void init_threaded_search(void)
1863{
1864        init_recursive_mutex(&read_mutex);
1865        pthread_mutex_init(&cache_mutex, NULL);
1866        pthread_mutex_init(&progress_mutex, NULL);
1867        pthread_cond_init(&progress_cond, NULL);
1868        old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
1869}
1870
1871static void cleanup_threaded_search(void)
1872{
1873        set_try_to_free_routine(old_try_to_free_routine);
1874        pthread_cond_destroy(&progress_cond);
1875        pthread_mutex_destroy(&read_mutex);
1876        pthread_mutex_destroy(&cache_mutex);
1877        pthread_mutex_destroy(&progress_mutex);
1878}
1879
1880static void *threaded_find_deltas(void *arg)
1881{
1882        struct thread_params *me = arg;
1883
1884        while (me->remaining) {
1885                find_deltas(me->list, &me->remaining,
1886                            me->window, me->depth, me->processed);
1887
1888                progress_lock();
1889                me->working = 0;
1890                pthread_cond_signal(&progress_cond);
1891                progress_unlock();
1892
1893                /*
1894                 * We must not set ->data_ready before we wait on the
1895                 * condition because the main thread may have set it to 1
1896                 * before we get here. In order to be sure that new
1897                 * work is available if we see 1 in ->data_ready, it
1898                 * was initialized to 0 before this thread was spawned
1899                 * and we reset it to 0 right away.
1900                 */
1901                pthread_mutex_lock(&me->mutex);
1902                while (!me->data_ready)
1903                        pthread_cond_wait(&me->cond, &me->mutex);
1904                me->data_ready = 0;
1905                pthread_mutex_unlock(&me->mutex);
1906        }
1907        /* leave ->working 1 so that this doesn't get more work assigned */
1908        return NULL;
1909}
1910
1911static void ll_find_deltas(struct object_entry **list, unsigned list_size,
1912                           int window, int depth, unsigned *processed)
1913{
1914        struct thread_params *p;
1915        int i, ret, active_threads = 0;
1916
1917        init_threaded_search();
1918
1919        if (!delta_search_threads)      /* --threads=0 means autodetect */
1920                delta_search_threads = online_cpus();
1921        if (delta_search_threads <= 1) {
1922                find_deltas(list, &list_size, window, depth, processed);
1923                cleanup_threaded_search();
1924                return;
1925        }
1926        if (progress > pack_to_stdout)
1927                fprintf(stderr, "Delta compression using up to %d threads.\n",
1928                                delta_search_threads);
1929        p = xcalloc(delta_search_threads, sizeof(*p));
1930
1931        /* Partition the work amongst work threads. */
1932        for (i = 0; i < delta_search_threads; i++) {
1933                unsigned sub_size = list_size / (delta_search_threads - i);
1934
1935                /* don't use too small segments or no deltas will be found */
1936                if (sub_size < 2*window && i+1 < delta_search_threads)
1937                        sub_size = 0;
1938
1939                p[i].window = window;
1940                p[i].depth = depth;
1941                p[i].processed = processed;
1942                p[i].working = 1;
1943                p[i].data_ready = 0;
1944
1945                /* try to split chunks on "path" boundaries */
1946                while (sub_size && sub_size < list_size &&
1947                       list[sub_size]->hash &&
1948                       list[sub_size]->hash == list[sub_size-1]->hash)
1949                        sub_size++;
1950
1951                p[i].list = list;
1952                p[i].list_size = sub_size;
1953                p[i].remaining = sub_size;
1954
1955                list += sub_size;
1956                list_size -= sub_size;
1957        }
1958
1959        /* Start work threads. */
1960        for (i = 0; i < delta_search_threads; i++) {
1961                if (!p[i].list_size)
1962                        continue;
1963                pthread_mutex_init(&p[i].mutex, NULL);
1964                pthread_cond_init(&p[i].cond, NULL);
1965                ret = pthread_create(&p[i].thread, NULL,
1966                                     threaded_find_deltas, &p[i]);
1967                if (ret)
1968                        die("unable to create thread: %s", strerror(ret));
1969                active_threads++;
1970        }
1971
1972        /*
1973         * Now let's wait for work completion.  Each time a thread is done
1974         * with its work, we steal half of the remaining work from the
1975         * thread with the largest number of unprocessed objects and give
1976         * it to that newly idle thread.  This ensure good load balancing
1977         * until the remaining object list segments are simply too short
1978         * to be worth splitting anymore.
1979         */
1980        while (active_threads) {
1981                struct thread_params *target = NULL;
1982                struct thread_params *victim = NULL;
1983                unsigned sub_size = 0;
1984
1985                progress_lock();
1986                for (;;) {
1987                        for (i = 0; !target && i < delta_search_threads; i++)
1988                                if (!p[i].working)
1989                                        target = &p[i];
1990                        if (target)
1991                                break;
1992                        pthread_cond_wait(&progress_cond, &progress_mutex);
1993                }
1994
1995                for (i = 0; i < delta_search_threads; i++)
1996                        if (p[i].remaining > 2*window &&
1997                            (!victim || victim->remaining < p[i].remaining))
1998                                victim = &p[i];
1999                if (victim) {
2000                        sub_size = victim->remaining / 2;
2001                        list = victim->list + victim->list_size - sub_size;
2002                        while (sub_size && list[0]->hash &&
2003                               list[0]->hash == list[-1]->hash) {
2004                                list++;
2005                                sub_size--;
2006                        }
2007                        if (!sub_size) {
2008                                /*
2009                                 * It is possible for some "paths" to have
2010                                 * so many objects that no hash boundary
2011                                 * might be found.  Let's just steal the
2012                                 * exact half in that case.
2013                                 */
2014                                sub_size = victim->remaining / 2;
2015                                list -= sub_size;
2016                        }
2017                        target->list = list;
2018                        victim->list_size -= sub_size;
2019                        victim->remaining -= sub_size;
2020                }
2021                target->list_size = sub_size;
2022                target->remaining = sub_size;
2023                target->working = 1;
2024                progress_unlock();
2025
2026                pthread_mutex_lock(&target->mutex);
2027                target->data_ready = 1;
2028                pthread_cond_signal(&target->cond);
2029                pthread_mutex_unlock(&target->mutex);
2030
2031                if (!sub_size) {
2032                        pthread_join(target->thread, NULL);
2033                        pthread_cond_destroy(&target->cond);
2034                        pthread_mutex_destroy(&target->mutex);
2035                        active_threads--;
2036                }
2037        }
2038        cleanup_threaded_search();
2039        free(p);
2040}
2041
2042#else
2043#define ll_find_deltas(l, s, w, d, p)   find_deltas(l, &s, w, d, p)
2044#endif
2045
2046static int add_ref_tag(const char *path, const unsigned char *sha1, int flag, void *cb_data)
2047{
2048        unsigned char peeled[20];
2049
2050        if (!prefixcmp(path, "refs/tags/") && /* is a tag? */
2051            !peel_ref(path, peeled)        && /* peelable? */
2052            packlist_find(&to_pack, peeled, NULL))      /* object packed? */
2053                add_object_entry(sha1, OBJ_TAG, NULL, 0);
2054        return 0;
2055}
2056
2057static void prepare_pack(int window, int depth)
2058{
2059        struct object_entry **delta_list;
2060        uint32_t i, nr_deltas;
2061        unsigned n;
2062
2063        get_object_details();
2064
2065        /*
2066         * If we're locally repacking then we need to be doubly careful
2067         * from now on in order to make sure no stealth corruption gets
2068         * propagated to the new pack.  Clients receiving streamed packs
2069         * should validate everything they get anyway so no need to incur
2070         * the additional cost here in that case.
2071         */
2072        if (!pack_to_stdout)
2073                do_check_packed_object_crc = 1;
2074
2075        if (!to_pack.nr_objects || !window || !depth)
2076                return;
2077
2078        delta_list = xmalloc(to_pack.nr_objects * sizeof(*delta_list));
2079        nr_deltas = n = 0;
2080
2081        for (i = 0; i < to_pack.nr_objects; i++) {
2082                struct object_entry *entry = to_pack.objects + i;
2083
2084                if (entry->delta)
2085                        /* This happens if we decided to reuse existing
2086                         * delta from a pack.  "reuse_delta &&" is implied.
2087                         */
2088                        continue;
2089
2090                if (entry->size < 50)
2091                        continue;
2092
2093                if (entry->no_try_delta)
2094                        continue;
2095
2096                if (!entry->preferred_base) {
2097                        nr_deltas++;
2098                        if (entry->type < 0)
2099                                die("unable to get type of object %s",
2100                                    sha1_to_hex(entry->idx.sha1));
2101                } else {
2102                        if (entry->type < 0) {
2103                                /*
2104                                 * This object is not found, but we
2105                                 * don't have to include it anyway.
2106                                 */
2107                                continue;
2108                        }
2109                }
2110
2111                delta_list[n++] = entry;
2112        }
2113
2114        if (nr_deltas && n > 1) {
2115                unsigned nr_done = 0;
2116                if (progress)
2117                        progress_state = start_progress("Compressing objects",
2118                                                        nr_deltas);
2119                qsort(delta_list, n, sizeof(*delta_list), type_size_sort);
2120                ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
2121                stop_progress(&progress_state);
2122                if (nr_done != nr_deltas)
2123                        die("inconsistency with delta count");
2124        }
2125        free(delta_list);
2126}
2127
2128static int git_pack_config(const char *k, const char *v, void *cb)
2129{
2130        if (!strcmp(k, "pack.window")) {
2131                window = git_config_int(k, v);
2132                return 0;
2133        }
2134        if (!strcmp(k, "pack.windowmemory")) {
2135                window_memory_limit = git_config_ulong(k, v);
2136                return 0;
2137        }
2138        if (!strcmp(k, "pack.depth")) {
2139                depth = git_config_int(k, v);
2140                return 0;
2141        }
2142        if (!strcmp(k, "pack.compression")) {
2143                int level = git_config_int(k, v);
2144                if (level == -1)
2145                        level = Z_DEFAULT_COMPRESSION;
2146                else if (level < 0 || level > Z_BEST_COMPRESSION)
2147                        die("bad pack compression level %d", level);
2148                pack_compression_level = level;
2149                pack_compression_seen = 1;
2150                return 0;
2151        }
2152        if (!strcmp(k, "pack.deltacachesize")) {
2153                max_delta_cache_size = git_config_int(k, v);
2154                return 0;
2155        }
2156        if (!strcmp(k, "pack.deltacachelimit")) {
2157                cache_max_small_delta_size = git_config_int(k, v);
2158                return 0;
2159        }
2160        if (!strcmp(k, "pack.usebitmaps")) {
2161                use_bitmap_index = git_config_bool(k, v);
2162                return 0;
2163        }
2164        if (!strcmp(k, "pack.threads")) {
2165                delta_search_threads = git_config_int(k, v);
2166                if (delta_search_threads < 0)
2167                        die("invalid number of threads specified (%d)",
2168                            delta_search_threads);
2169#ifdef NO_PTHREADS
2170                if (delta_search_threads != 1)
2171                        warning("no threads support, ignoring %s", k);
2172#endif
2173                return 0;
2174        }
2175        if (!strcmp(k, "pack.indexversion")) {
2176                pack_idx_opts.version = git_config_int(k, v);
2177                if (pack_idx_opts.version > 2)
2178                        die("bad pack.indexversion=%"PRIu32,
2179                            pack_idx_opts.version);
2180                return 0;
2181        }
2182        return git_default_config(k, v, cb);
2183}
2184
2185static void read_object_list_from_stdin(void)
2186{
2187        char line[40 + 1 + PATH_MAX + 2];
2188        unsigned char sha1[20];
2189
2190        for (;;) {
2191                if (!fgets(line, sizeof(line), stdin)) {
2192                        if (feof(stdin))
2193                                break;
2194                        if (!ferror(stdin))
2195                                die("fgets returned NULL, not EOF, not error!");
2196                        if (errno != EINTR)
2197                                die_errno("fgets");
2198                        clearerr(stdin);
2199                        continue;
2200                }
2201                if (line[0] == '-') {
2202                        if (get_sha1_hex(line+1, sha1))
2203                                die("expected edge sha1, got garbage:\n %s",
2204                                    line);
2205                        add_preferred_base(sha1);
2206                        continue;
2207                }
2208                if (get_sha1_hex(line, sha1))
2209                        die("expected sha1, got garbage:\n %s", line);
2210
2211                add_preferred_base_object(line+41);
2212                add_object_entry(sha1, 0, line+41, 0);
2213        }
2214}
2215
2216#define OBJECT_ADDED (1u<<20)
2217
2218static void show_commit(struct commit *commit, void *data)
2219{
2220        add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
2221        commit->object.flags |= OBJECT_ADDED;
2222}
2223
2224static void show_object(struct object *obj,
2225                        const struct name_path *path, const char *last,
2226                        void *data)
2227{
2228        char *name = path_name(path, last);
2229
2230        add_preferred_base_object(name);
2231        add_object_entry(obj->sha1, obj->type, name, 0);
2232        obj->flags |= OBJECT_ADDED;
2233
2234        /*
2235         * We will have generated the hash from the name,
2236         * but not saved a pointer to it - we can free it
2237         */
2238        free((char *)name);
2239}
2240
2241static void show_edge(struct commit *commit)
2242{
2243        add_preferred_base(commit->object.sha1);
2244}
2245
2246struct in_pack_object {
2247        off_t offset;
2248        struct object *object;
2249};
2250
2251struct in_pack {
2252        int alloc;
2253        int nr;
2254        struct in_pack_object *array;
2255};
2256
2257static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
2258{
2259        in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p);
2260        in_pack->array[in_pack->nr].object = object;
2261        in_pack->nr++;
2262}
2263
2264/*
2265 * Compare the objects in the offset order, in order to emulate the
2266 * "git rev-list --objects" output that produced the pack originally.
2267 */
2268static int ofscmp(const void *a_, const void *b_)
2269{
2270        struct in_pack_object *a = (struct in_pack_object *)a_;
2271        struct in_pack_object *b = (struct in_pack_object *)b_;
2272
2273        if (a->offset < b->offset)
2274                return -1;
2275        else if (a->offset > b->offset)
2276                return 1;
2277        else
2278                return hashcmp(a->object->sha1, b->object->sha1);
2279}
2280
2281static void add_objects_in_unpacked_packs(struct rev_info *revs)
2282{
2283        struct packed_git *p;
2284        struct in_pack in_pack;
2285        uint32_t i;
2286
2287        memset(&in_pack, 0, sizeof(in_pack));
2288
2289        for (p = packed_git; p; p = p->next) {
2290                const unsigned char *sha1;
2291                struct object *o;
2292
2293                if (!p->pack_local || p->pack_keep)
2294                        continue;
2295                if (open_pack_index(p))
2296                        die("cannot open pack index");
2297
2298                ALLOC_GROW(in_pack.array,
2299                           in_pack.nr + p->num_objects,
2300                           in_pack.alloc);
2301
2302                for (i = 0; i < p->num_objects; i++) {
2303                        sha1 = nth_packed_object_sha1(p, i);
2304                        o = lookup_unknown_object(sha1);
2305                        if (!(o->flags & OBJECT_ADDED))
2306                                mark_in_pack_object(o, p, &in_pack);
2307                        o->flags |= OBJECT_ADDED;
2308                }
2309        }
2310
2311        if (in_pack.nr) {
2312                qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
2313                      ofscmp);
2314                for (i = 0; i < in_pack.nr; i++) {
2315                        struct object *o = in_pack.array[i].object;
2316                        add_object_entry(o->sha1, o->type, "", 0);
2317                }
2318        }
2319        free(in_pack.array);
2320}
2321
2322static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
2323{
2324        static struct packed_git *last_found = (void *)1;
2325        struct packed_git *p;
2326
2327        p = (last_found != (void *)1) ? last_found : packed_git;
2328
2329        while (p) {
2330                if ((!p->pack_local || p->pack_keep) &&
2331                        find_pack_entry_one(sha1, p)) {
2332                        last_found = p;
2333                        return 1;
2334                }
2335                if (p == last_found)
2336                        p = packed_git;
2337                else
2338                        p = p->next;
2339                if (p == last_found)
2340                        p = p->next;
2341        }
2342        return 0;
2343}
2344
2345static void loosen_unused_packed_objects(struct rev_info *revs)
2346{
2347        struct packed_git *p;
2348        uint32_t i;
2349        const unsigned char *sha1;
2350
2351        for (p = packed_git; p; p = p->next) {
2352                if (!p->pack_local || p->pack_keep)
2353                        continue;
2354
2355                if (unpack_unreachable_expiration &&
2356                    p->mtime < unpack_unreachable_expiration)
2357                        continue;
2358
2359                if (open_pack_index(p))
2360                        die("cannot open pack index");
2361
2362                for (i = 0; i < p->num_objects; i++) {
2363                        sha1 = nth_packed_object_sha1(p, i);
2364                        if (!packlist_find(&to_pack, sha1, NULL) &&
2365                                !has_sha1_pack_kept_or_nonlocal(sha1))
2366                                if (force_object_loose(sha1, p->mtime))
2367                                        die("unable to force loose object");
2368                }
2369        }
2370}
2371
2372static int get_object_list_from_bitmap(struct rev_info *revs)
2373{
2374        if (prepare_bitmap_walk(revs) < 0)
2375                return -1;
2376
2377        if (!reuse_partial_packfile_from_bitmap(
2378                        &reuse_packfile,
2379                        &reuse_packfile_objects,
2380                        &reuse_packfile_offset)) {
2381                assert(reuse_packfile_objects);
2382                nr_result += reuse_packfile_objects;
2383
2384                if (progress) {
2385                        fprintf(stderr, "Reusing existing pack: %d, done.\n",
2386                                reuse_packfile_objects);
2387                        fflush(stderr);
2388                }
2389        }
2390
2391        traverse_bitmap_commit_list(&add_object_entry_from_bitmap);
2392        return 0;
2393}
2394
2395static void get_object_list(int ac, const char **av)
2396{
2397        struct rev_info revs;
2398        char line[1000];
2399        int flags = 0;
2400
2401        init_revisions(&revs, NULL);
2402        save_commit_buffer = 0;
2403        setup_revisions(ac, av, &revs, NULL);
2404
2405        while (fgets(line, sizeof(line), stdin) != NULL) {
2406                int len = strlen(line);
2407                if (len && line[len - 1] == '\n')
2408                        line[--len] = 0;
2409                if (!len)
2410                        break;
2411                if (*line == '-') {
2412                        if (!strcmp(line, "--not")) {
2413                                flags ^= UNINTERESTING;
2414                                continue;
2415                        }
2416                        die("not a rev '%s'", line);
2417                }
2418                if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
2419                        die("bad revision '%s'", line);
2420        }
2421
2422        if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
2423                return;
2424
2425        if (prepare_revision_walk(&revs))
2426                die("revision walk setup failed");
2427        mark_edges_uninteresting(&revs, show_edge);
2428        traverse_commit_list(&revs, show_commit, show_object, NULL);
2429
2430        if (keep_unreachable)
2431                add_objects_in_unpacked_packs(&revs);
2432        if (unpack_unreachable)
2433                loosen_unused_packed_objects(&revs);
2434}
2435
2436static int option_parse_index_version(const struct option *opt,
2437                                      const char *arg, int unset)
2438{
2439        char *c;
2440        const char *val = arg;
2441        pack_idx_opts.version = strtoul(val, &c, 10);
2442        if (pack_idx_opts.version > 2)
2443                die(_("unsupported index version %s"), val);
2444        if (*c == ',' && c[1])
2445                pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
2446        if (*c || pack_idx_opts.off32_limit & 0x80000000)
2447                die(_("bad index version '%s'"), val);
2448        return 0;
2449}
2450
2451static int option_parse_unpack_unreachable(const struct option *opt,
2452                                           const char *arg, int unset)
2453{
2454        if (unset) {
2455                unpack_unreachable = 0;
2456                unpack_unreachable_expiration = 0;
2457        }
2458        else {
2459                unpack_unreachable = 1;
2460                if (arg)
2461                        unpack_unreachable_expiration = approxidate(arg);
2462        }
2463        return 0;
2464}
2465
2466static int option_parse_ulong(const struct option *opt,
2467                              const char *arg, int unset)
2468{
2469        if (unset)
2470                die(_("option %s does not accept negative form"),
2471                    opt->long_name);
2472
2473        if (!git_parse_ulong(arg, opt->value))
2474                die(_("unable to parse value '%s' for option %s"),
2475                    arg, opt->long_name);
2476        return 0;
2477}
2478
2479#define OPT_ULONG(s, l, v, h) \
2480        { OPTION_CALLBACK, (s), (l), (v), "n", (h),     \
2481          PARSE_OPT_NONEG, option_parse_ulong }
2482
2483int cmd_pack_objects(int argc, const char **argv, const char *prefix)
2484{
2485        int use_internal_rev_list = 0;
2486        int thin = 0;
2487        int all_progress_implied = 0;
2488        const char *rp_av[6];
2489        int rp_ac = 0;
2490        int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
2491        struct option pack_objects_options[] = {
2492                OPT_SET_INT('q', "quiet", &progress,
2493                            N_("do not show progress meter"), 0),
2494                OPT_SET_INT(0, "progress", &progress,
2495                            N_("show progress meter"), 1),
2496                OPT_SET_INT(0, "all-progress", &progress,
2497                            N_("show progress meter during object writing phase"), 2),
2498                OPT_BOOL(0, "all-progress-implied",
2499                         &all_progress_implied,
2500                         N_("similar to --all-progress when progress meter is shown")),
2501                { OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"),
2502                  N_("write the pack index file in the specified idx format version"),
2503                  0, option_parse_index_version },
2504                OPT_ULONG(0, "max-pack-size", &pack_size_limit,
2505                          N_("maximum size of each output pack file")),
2506                OPT_BOOL(0, "local", &local,
2507                         N_("ignore borrowed objects from alternate object store")),
2508                OPT_BOOL(0, "incremental", &incremental,
2509                         N_("ignore packed objects")),
2510                OPT_INTEGER(0, "window", &window,
2511                            N_("limit pack window by objects")),
2512                OPT_ULONG(0, "window-memory", &window_memory_limit,
2513                          N_("limit pack window by memory in addition to object limit")),
2514                OPT_INTEGER(0, "depth", &depth,
2515                            N_("maximum length of delta chain allowed in the resulting pack")),
2516                OPT_BOOL(0, "reuse-delta", &reuse_delta,
2517                         N_("reuse existing deltas")),
2518                OPT_BOOL(0, "reuse-object", &reuse_object,
2519                         N_("reuse existing objects")),
2520                OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
2521                         N_("use OFS_DELTA objects")),
2522                OPT_INTEGER(0, "threads", &delta_search_threads,
2523                            N_("use threads when searching for best delta matches")),
2524                OPT_BOOL(0, "non-empty", &non_empty,
2525                         N_("do not create an empty pack output")),
2526                OPT_BOOL(0, "revs", &use_internal_rev_list,
2527                         N_("read revision arguments from standard input")),
2528                { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,
2529                  N_("limit the objects to those that are not yet packed"),
2530                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
2531                { OPTION_SET_INT, 0, "all", &rev_list_all, NULL,
2532                  N_("include objects reachable from any reference"),
2533                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
2534                { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
2535                  N_("include objects referred by reflog entries"),
2536                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
2537                OPT_BOOL(0, "stdout", &pack_to_stdout,
2538                         N_("output pack to stdout")),
2539                OPT_BOOL(0, "include-tag", &include_tag,
2540                         N_("include tag objects that refer to objects to be packed")),
2541                OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
2542                         N_("keep unreachable objects")),
2543                { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
2544                  N_("unpack unreachable objects newer than <time>"),
2545                  PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
2546                OPT_BOOL(0, "thin", &thin,
2547                         N_("create thin packs")),
2548                OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep,
2549                         N_("ignore packs that have companion .keep file")),
2550                OPT_INTEGER(0, "compression", &pack_compression_level,
2551                            N_("pack compression level")),
2552                OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
2553                            N_("do not hide commits by grafts"), 0),
2554                OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
2555                         N_("use a bitmap index if available to speed up counting objects")),
2556                OPT_END(),
2557        };
2558
2559        read_replace_refs = 0;
2560
2561        reset_pack_idx_option(&pack_idx_opts);
2562        git_config(git_pack_config, NULL);
2563        if (!pack_compression_seen && core_compression_seen)
2564                pack_compression_level = core_compression_level;
2565
2566        progress = isatty(2);
2567        argc = parse_options(argc, argv, prefix, pack_objects_options,
2568                             pack_usage, 0);
2569
2570        if (argc) {
2571                base_name = argv[0];
2572                argc--;
2573        }
2574        if (pack_to_stdout != !base_name || argc)
2575                usage_with_options(pack_usage, pack_objects_options);
2576
2577        rp_av[rp_ac++] = "pack-objects";
2578        if (thin) {
2579                use_internal_rev_list = 1;
2580                rp_av[rp_ac++] = "--objects-edge";
2581        } else
2582                rp_av[rp_ac++] = "--objects";
2583
2584        if (rev_list_all) {
2585                use_internal_rev_list = 1;
2586                rp_av[rp_ac++] = "--all";
2587        }
2588        if (rev_list_reflog) {
2589                use_internal_rev_list = 1;
2590                rp_av[rp_ac++] = "--reflog";
2591        }
2592        if (rev_list_unpacked) {
2593                use_internal_rev_list = 1;
2594                rp_av[rp_ac++] = "--unpacked";
2595        }
2596
2597        if (!reuse_object)
2598                reuse_delta = 0;
2599        if (pack_compression_level == -1)
2600                pack_compression_level = Z_DEFAULT_COMPRESSION;
2601        else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
2602                die("bad pack compression level %d", pack_compression_level);
2603#ifdef NO_PTHREADS
2604        if (delta_search_threads != 1)
2605                warning("no threads support, ignoring --threads");
2606#endif
2607        if (!pack_to_stdout && !pack_size_limit)
2608                pack_size_limit = pack_size_limit_cfg;
2609        if (pack_to_stdout && pack_size_limit)
2610                die("--max-pack-size cannot be used to build a pack for transfer.");
2611        if (pack_size_limit && pack_size_limit < 1024*1024) {
2612                warning("minimum pack size limit is 1 MiB");
2613                pack_size_limit = 1024*1024;
2614        }
2615
2616        if (!pack_to_stdout && thin)
2617                die("--thin cannot be used to build an indexable pack.");
2618
2619        if (keep_unreachable && unpack_unreachable)
2620                die("--keep-unreachable and --unpack-unreachable are incompatible.");
2621
2622        if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
2623                use_bitmap_index = 0;
2624
2625        if (progress && all_progress_implied)
2626                progress = 2;
2627
2628        prepare_packed_git();
2629
2630        if (progress)
2631                progress_state = start_progress("Counting objects", 0);
2632        if (!use_internal_rev_list)
2633                read_object_list_from_stdin();
2634        else {
2635                rp_av[rp_ac] = NULL;
2636                get_object_list(rp_ac, rp_av);
2637        }
2638        cleanup_preferred_base();
2639        if (include_tag && nr_result)
2640                for_each_ref(add_ref_tag, NULL);
2641        stop_progress(&progress_state);
2642
2643        if (non_empty && !nr_result)
2644                return 0;
2645        if (nr_result)
2646                prepare_pack(window, depth);
2647        write_pack_file();
2648        if (progress)
2649                fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
2650                        " reused %"PRIu32" (delta %"PRIu32")\n",
2651                        written, written_delta, reused, reused_delta);
2652        return 0;
2653}