builtin / pack-objects.con commit pack-objects: shrink size field in struct object_entry (ac77d0c)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "repository.h"
   4#include "config.h"
   5#include "attr.h"
   6#include "object.h"
   7#include "blob.h"
   8#include "commit.h"
   9#include "tag.h"
  10#include "tree.h"
  11#include "delta.h"
  12#include "pack.h"
  13#include "pack-revindex.h"
  14#include "csum-file.h"
  15#include "tree-walk.h"
  16#include "diff.h"
  17#include "revision.h"
  18#include "list-objects.h"
  19#include "list-objects-filter.h"
  20#include "list-objects-filter-options.h"
  21#include "pack-objects.h"
  22#include "progress.h"
  23#include "refs.h"
  24#include "streaming.h"
  25#include "thread-utils.h"
  26#include "pack-bitmap.h"
  27#include "reachable.h"
  28#include "sha1-array.h"
  29#include "argv-array.h"
  30#include "list.h"
  31#include "packfile.h"
  32#include "object-store.h"
  33
  34#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
  35#define SIZE(obj) oe_size(&to_pack, obj)
  36#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
  37#define DELTA(obj) oe_delta(&to_pack, obj)
  38#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
  39#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
  40#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
  41#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
  42#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
  43
  44static const char *pack_usage[] = {
  45        N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
  46        N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
  47        NULL
  48};
  49
  50/*
  51 * Objects we are going to pack are collected in the `to_pack` structure.
  52 * It contains an array (dynamically expanded) of the object data, and a map
  53 * that can resolve SHA1s to their position in the array.
  54 */
  55static struct packing_data to_pack;
  56
  57static struct pack_idx_entry **written_list;
  58static uint32_t nr_result, nr_written;
  59
  60static int non_empty;
  61static int reuse_delta = 1, reuse_object = 1;
  62static int keep_unreachable, unpack_unreachable, include_tag;
  63static timestamp_t unpack_unreachable_expiration;
  64static int pack_loose_unreachable;
  65static int local;
  66static int have_non_local_packs;
  67static int incremental;
  68static int ignore_packed_keep;
  69static int allow_ofs_delta;
  70static struct pack_idx_option pack_idx_opts;
  71static const char *base_name;
  72static int progress = 1;
  73static int window = 10;
  74static unsigned long pack_size_limit;
  75static int depth = 50;
  76static int delta_search_threads;
  77static int pack_to_stdout;
  78static int num_preferred_base;
  79static struct progress *progress_state;
  80
  81static struct packed_git *reuse_packfile;
  82static uint32_t reuse_packfile_objects;
  83static off_t reuse_packfile_offset;
  84
  85static int use_bitmap_index_default = 1;
  86static int use_bitmap_index = -1;
  87static int write_bitmap_index;
  88static uint16_t write_bitmap_options;
  89
  90static int exclude_promisor_objects;
  91
  92static unsigned long delta_cache_size = 0;
  93static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
  94static unsigned long cache_max_small_delta_size = 1000;
  95
  96static unsigned long window_memory_limit = 0;
  97
  98static struct list_objects_filter_options filter_options;
  99
 100enum missing_action {
 101        MA_ERROR = 0,      /* fail if any missing objects are encountered */
 102        MA_ALLOW_ANY,      /* silently allow ALL missing objects */
 103        MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 104};
 105static enum missing_action arg_missing_action;
 106static show_object_fn fn_show_object;
 107
 108/*
 109 * stats
 110 */
 111static uint32_t written, written_delta;
 112static uint32_t reused, reused_delta;
 113
 114/*
 115 * Indexed commits
 116 */
 117static struct commit **indexed_commits;
 118static unsigned int indexed_commits_nr;
 119static unsigned int indexed_commits_alloc;
 120
 121static void index_commit_for_bitmap(struct commit *commit)
 122{
 123        if (indexed_commits_nr >= indexed_commits_alloc) {
 124                indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
 125                REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
 126        }
 127
 128        indexed_commits[indexed_commits_nr++] = commit;
 129}
 130
 131static void *get_delta(struct object_entry *entry)
 132{
 133        unsigned long size, base_size, delta_size;
 134        void *buf, *base_buf, *delta_buf;
 135        enum object_type type;
 136
 137        buf = read_object_file(&entry->idx.oid, &type, &size);
 138        if (!buf)
 139                die("unable to read %s", oid_to_hex(&entry->idx.oid));
 140        base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
 141                                    &base_size);
 142        if (!base_buf)
 143                die("unable to read %s",
 144                    oid_to_hex(&DELTA(entry)->idx.oid));
 145        delta_buf = diff_delta(base_buf, base_size,
 146                               buf, size, &delta_size, 0);
 147        if (!delta_buf || delta_size != entry->delta_size)
 148                die("delta size changed");
 149        free(buf);
 150        free(base_buf);
 151        return delta_buf;
 152}
 153
 154static unsigned long do_compress(void **pptr, unsigned long size)
 155{
 156        git_zstream stream;
 157        void *in, *out;
 158        unsigned long maxsize;
 159
 160        git_deflate_init(&stream, pack_compression_level);
 161        maxsize = git_deflate_bound(&stream, size);
 162
 163        in = *pptr;
 164        out = xmalloc(maxsize);
 165        *pptr = out;
 166
 167        stream.next_in = in;
 168        stream.avail_in = size;
 169        stream.next_out = out;
 170        stream.avail_out = maxsize;
 171        while (git_deflate(&stream, Z_FINISH) == Z_OK)
 172                ; /* nothing */
 173        git_deflate_end(&stream);
 174
 175        free(in);
 176        return stream.total_out;
 177}
 178
 179static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
 180                                           const struct object_id *oid)
 181{
 182        git_zstream stream;
 183        unsigned char ibuf[1024 * 16];
 184        unsigned char obuf[1024 * 16];
 185        unsigned long olen = 0;
 186
 187        git_deflate_init(&stream, pack_compression_level);
 188
 189        for (;;) {
 190                ssize_t readlen;
 191                int zret = Z_OK;
 192                readlen = read_istream(st, ibuf, sizeof(ibuf));
 193                if (readlen == -1)
 194                        die(_("unable to read %s"), oid_to_hex(oid));
 195
 196                stream.next_in = ibuf;
 197                stream.avail_in = readlen;
 198                while ((stream.avail_in || readlen == 0) &&
 199                       (zret == Z_OK || zret == Z_BUF_ERROR)) {
 200                        stream.next_out = obuf;
 201                        stream.avail_out = sizeof(obuf);
 202                        zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 203                        hashwrite(f, obuf, stream.next_out - obuf);
 204                        olen += stream.next_out - obuf;
 205                }
 206                if (stream.avail_in)
 207                        die(_("deflate error (%d)"), zret);
 208                if (readlen == 0) {
 209                        if (zret != Z_STREAM_END)
 210                                die(_("deflate error (%d)"), zret);
 211                        break;
 212                }
 213        }
 214        git_deflate_end(&stream);
 215        return olen;
 216}
 217
 218/*
 219 * we are going to reuse the existing object data as is.  make
 220 * sure it is not corrupt.
 221 */
 222static int check_pack_inflate(struct packed_git *p,
 223                struct pack_window **w_curs,
 224                off_t offset,
 225                off_t len,
 226                unsigned long expect)
 227{
 228        git_zstream stream;
 229        unsigned char fakebuf[4096], *in;
 230        int st;
 231
 232        memset(&stream, 0, sizeof(stream));
 233        git_inflate_init(&stream);
 234        do {
 235                in = use_pack(p, w_curs, offset, &stream.avail_in);
 236                stream.next_in = in;
 237                stream.next_out = fakebuf;
 238                stream.avail_out = sizeof(fakebuf);
 239                st = git_inflate(&stream, Z_FINISH);
 240                offset += stream.next_in - in;
 241        } while (st == Z_OK || st == Z_BUF_ERROR);
 242        git_inflate_end(&stream);
 243        return (st == Z_STREAM_END &&
 244                stream.total_out == expect &&
 245                stream.total_in == len) ? 0 : -1;
 246}
 247
 248static void copy_pack_data(struct hashfile *f,
 249                struct packed_git *p,
 250                struct pack_window **w_curs,
 251                off_t offset,
 252                off_t len)
 253{
 254        unsigned char *in;
 255        unsigned long avail;
 256
 257        while (len) {
 258                in = use_pack(p, w_curs, offset, &avail);
 259                if (avail > len)
 260                        avail = (unsigned long)len;
 261                hashwrite(f, in, avail);
 262                offset += avail;
 263                len -= avail;
 264        }
 265}
 266
 267/* Return 0 if we will bust the pack-size limit */
 268static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
 269                                           unsigned long limit, int usable_delta)
 270{
 271        unsigned long size, datalen;
 272        unsigned char header[MAX_PACK_OBJECT_HEADER],
 273                      dheader[MAX_PACK_OBJECT_HEADER];
 274        unsigned hdrlen;
 275        enum object_type type;
 276        void *buf;
 277        struct git_istream *st = NULL;
 278
 279        if (!usable_delta) {
 280                if (oe_type(entry) == OBJ_BLOB &&
 281                    oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
 282                    (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL)
 283                        buf = NULL;
 284                else {
 285                        buf = read_object_file(&entry->idx.oid, &type, &size);
 286                        if (!buf)
 287                                die(_("unable to read %s"),
 288                                    oid_to_hex(&entry->idx.oid));
 289                }
 290                /*
 291                 * make sure no cached delta data remains from a
 292                 * previous attempt before a pack split occurred.
 293                 */
 294                FREE_AND_NULL(entry->delta_data);
 295                entry->z_delta_size = 0;
 296        } else if (entry->delta_data) {
 297                size = entry->delta_size;
 298                buf = entry->delta_data;
 299                entry->delta_data = NULL;
 300                type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 301                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 302        } else {
 303                buf = get_delta(entry);
 304                size = entry->delta_size;
 305                type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 306                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 307        }
 308
 309        if (st) /* large blob case, just assume we don't compress well */
 310                datalen = size;
 311        else if (entry->z_delta_size)
 312                datalen = entry->z_delta_size;
 313        else
 314                datalen = do_compress(&buf, size);
 315
 316        /*
 317         * The object header is a byte of 'type' followed by zero or
 318         * more bytes of length.
 319         */
 320        hdrlen = encode_in_pack_object_header(header, sizeof(header),
 321                                              type, size);
 322
 323        if (type == OBJ_OFS_DELTA) {
 324                /*
 325                 * Deltas with relative base contain an additional
 326                 * encoding of the relative offset for the delta
 327                 * base from this object's position in the pack.
 328                 */
 329                off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 330                unsigned pos = sizeof(dheader) - 1;
 331                dheader[pos] = ofs & 127;
 332                while (ofs >>= 7)
 333                        dheader[--pos] = 128 | (--ofs & 127);
 334                if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 335                        if (st)
 336                                close_istream(st);
 337                        free(buf);
 338                        return 0;
 339                }
 340                hashwrite(f, header, hdrlen);
 341                hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 342                hdrlen += sizeof(dheader) - pos;
 343        } else if (type == OBJ_REF_DELTA) {
 344                /*
 345                 * Deltas with a base reference contain
 346                 * an additional 20 bytes for the base sha1.
 347                 */
 348                if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 349                        if (st)
 350                                close_istream(st);
 351                        free(buf);
 352                        return 0;
 353                }
 354                hashwrite(f, header, hdrlen);
 355                hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
 356                hdrlen += 20;
 357        } else {
 358                if (limit && hdrlen + datalen + 20 >= limit) {
 359                        if (st)
 360                                close_istream(st);
 361                        free(buf);
 362                        return 0;
 363                }
 364                hashwrite(f, header, hdrlen);
 365        }
 366        if (st) {
 367                datalen = write_large_blob_data(st, f, &entry->idx.oid);
 368                close_istream(st);
 369        } else {
 370                hashwrite(f, buf, datalen);
 371                free(buf);
 372        }
 373
 374        return hdrlen + datalen;
 375}
 376
 377/* Return 0 if we will bust the pack-size limit */
 378static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 379                                unsigned long limit, int usable_delta)
 380{
 381        struct packed_git *p = IN_PACK(entry);
 382        struct pack_window *w_curs = NULL;
 383        struct revindex_entry *revidx;
 384        off_t offset;
 385        enum object_type type = oe_type(entry);
 386        off_t datalen;
 387        unsigned char header[MAX_PACK_OBJECT_HEADER],
 388                      dheader[MAX_PACK_OBJECT_HEADER];
 389        unsigned hdrlen;
 390        unsigned long entry_size = SIZE(entry);
 391
 392        if (DELTA(entry))
 393                type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 394                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 395        hdrlen = encode_in_pack_object_header(header, sizeof(header),
 396                                              type, entry_size);
 397
 398        offset = entry->in_pack_offset;
 399        revidx = find_pack_revindex(p, offset);
 400        datalen = revidx[1].offset - offset;
 401        if (!pack_to_stdout && p->index_version > 1 &&
 402            check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
 403                error("bad packed object CRC for %s",
 404                      oid_to_hex(&entry->idx.oid));
 405                unuse_pack(&w_curs);
 406                return write_no_reuse_object(f, entry, limit, usable_delta);
 407        }
 408
 409        offset += entry->in_pack_header_size;
 410        datalen -= entry->in_pack_header_size;
 411
 412        if (!pack_to_stdout && p->index_version == 1 &&
 413            check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 414                error("corrupt packed object for %s",
 415                      oid_to_hex(&entry->idx.oid));
 416                unuse_pack(&w_curs);
 417                return write_no_reuse_object(f, entry, limit, usable_delta);
 418        }
 419
 420        if (type == OBJ_OFS_DELTA) {
 421                off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 422                unsigned pos = sizeof(dheader) - 1;
 423                dheader[pos] = ofs & 127;
 424                while (ofs >>= 7)
 425                        dheader[--pos] = 128 | (--ofs & 127);
 426                if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 427                        unuse_pack(&w_curs);
 428                        return 0;
 429                }
 430                hashwrite(f, header, hdrlen);
 431                hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 432                hdrlen += sizeof(dheader) - pos;
 433                reused_delta++;
 434        } else if (type == OBJ_REF_DELTA) {
 435                if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 436                        unuse_pack(&w_curs);
 437                        return 0;
 438                }
 439                hashwrite(f, header, hdrlen);
 440                hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
 441                hdrlen += 20;
 442                reused_delta++;
 443        } else {
 444                if (limit && hdrlen + datalen + 20 >= limit) {
 445                        unuse_pack(&w_curs);
 446                        return 0;
 447                }
 448                hashwrite(f, header, hdrlen);
 449        }
 450        copy_pack_data(f, p, &w_curs, offset, datalen);
 451        unuse_pack(&w_curs);
 452        reused++;
 453        return hdrlen + datalen;
 454}
 455
 456/* Return 0 if we will bust the pack-size limit */
 457static off_t write_object(struct hashfile *f,
 458                          struct object_entry *entry,
 459                          off_t write_offset)
 460{
 461        unsigned long limit;
 462        off_t len;
 463        int usable_delta, to_reuse;
 464
 465        if (!pack_to_stdout)
 466                crc32_begin(f);
 467
 468        /* apply size limit if limited packsize and not first object */
 469        if (!pack_size_limit || !nr_written)
 470                limit = 0;
 471        else if (pack_size_limit <= write_offset)
 472                /*
 473                 * the earlier object did not fit the limit; avoid
 474                 * mistaking this with unlimited (i.e. limit = 0).
 475                 */
 476                limit = 1;
 477        else
 478                limit = pack_size_limit - write_offset;
 479
 480        if (!DELTA(entry))
 481                usable_delta = 0;       /* no delta */
 482        else if (!pack_size_limit)
 483               usable_delta = 1;        /* unlimited packfile */
 484        else if (DELTA(entry)->idx.offset == (off_t)-1)
 485                usable_delta = 0;       /* base was written to another pack */
 486        else if (DELTA(entry)->idx.offset)
 487                usable_delta = 1;       /* base already exists in this pack */
 488        else
 489                usable_delta = 0;       /* base could end up in another pack */
 490
 491        if (!reuse_object)
 492                to_reuse = 0;   /* explicit */
 493        else if (!IN_PACK(entry))
 494                to_reuse = 0;   /* can't reuse what we don't have */
 495        else if (oe_type(entry) == OBJ_REF_DELTA ||
 496                 oe_type(entry) == OBJ_OFS_DELTA)
 497                                /* check_object() decided it for us ... */
 498                to_reuse = usable_delta;
 499                                /* ... but pack split may override that */
 500        else if (oe_type(entry) != entry->in_pack_type)
 501                to_reuse = 0;   /* pack has delta which is unusable */
 502        else if (DELTA(entry))
 503                to_reuse = 0;   /* we want to pack afresh */
 504        else
 505                to_reuse = 1;   /* we have it in-pack undeltified,
 506                                 * and we do not need to deltify it.
 507                                 */
 508
 509        if (!to_reuse)
 510                len = write_no_reuse_object(f, entry, limit, usable_delta);
 511        else
 512                len = write_reuse_object(f, entry, limit, usable_delta);
 513        if (!len)
 514                return 0;
 515
 516        if (usable_delta)
 517                written_delta++;
 518        written++;
 519        if (!pack_to_stdout)
 520                entry->idx.crc32 = crc32_end(f);
 521        return len;
 522}
 523
 524enum write_one_status {
 525        WRITE_ONE_SKIP = -1, /* already written */
 526        WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 527        WRITE_ONE_WRITTEN = 1, /* normal */
 528        WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 529};
 530
 531static enum write_one_status write_one(struct hashfile *f,
 532                                       struct object_entry *e,
 533                                       off_t *offset)
 534{
 535        off_t size;
 536        int recursing;
 537
 538        /*
 539         * we set offset to 1 (which is an impossible value) to mark
 540         * the fact that this object is involved in "write its base
 541         * first before writing a deltified object" recursion.
 542         */
 543        recursing = (e->idx.offset == 1);
 544        if (recursing) {
 545                warning("recursive delta detected for object %s",
 546                        oid_to_hex(&e->idx.oid));
 547                return WRITE_ONE_RECURSIVE;
 548        } else if (e->idx.offset || e->preferred_base) {
 549                /* offset is non zero if object is written already. */
 550                return WRITE_ONE_SKIP;
 551        }
 552
 553        /* if we are deltified, write out base object first. */
 554        if (DELTA(e)) {
 555                e->idx.offset = 1; /* now recurse */
 556                switch (write_one(f, DELTA(e), offset)) {
 557                case WRITE_ONE_RECURSIVE:
 558                        /* we cannot depend on this one */
 559                        SET_DELTA(e, NULL);
 560                        break;
 561                default:
 562                        break;
 563                case WRITE_ONE_BREAK:
 564                        e->idx.offset = recursing;
 565                        return WRITE_ONE_BREAK;
 566                }
 567        }
 568
 569        e->idx.offset = *offset;
 570        size = write_object(f, e, *offset);
 571        if (!size) {
 572                e->idx.offset = recursing;
 573                return WRITE_ONE_BREAK;
 574        }
 575        written_list[nr_written++] = &e->idx;
 576
 577        /* make sure off_t is sufficiently large not to wrap */
 578        if (signed_add_overflows(*offset, size))
 579                die("pack too large for current definition of off_t");
 580        *offset += size;
 581        return WRITE_ONE_WRITTEN;
 582}
 583
 584static int mark_tagged(const char *path, const struct object_id *oid, int flag,
 585                       void *cb_data)
 586{
 587        struct object_id peeled;
 588        struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL);
 589
 590        if (entry)
 591                entry->tagged = 1;
 592        if (!peel_ref(path, &peeled)) {
 593                entry = packlist_find(&to_pack, peeled.hash, NULL);
 594                if (entry)
 595                        entry->tagged = 1;
 596        }
 597        return 0;
 598}
 599
 600static inline void add_to_write_order(struct object_entry **wo,
 601                               unsigned int *endp,
 602                               struct object_entry *e)
 603{
 604        if (e->filled)
 605                return;
 606        wo[(*endp)++] = e;
 607        e->filled = 1;
 608}
 609
 610static void add_descendants_to_write_order(struct object_entry **wo,
 611                                           unsigned int *endp,
 612                                           struct object_entry *e)
 613{
 614        int add_to_order = 1;
 615        while (e) {
 616                if (add_to_order) {
 617                        struct object_entry *s;
 618                        /* add this node... */
 619                        add_to_write_order(wo, endp, e);
 620                        /* all its siblings... */
 621                        for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
 622                                add_to_write_order(wo, endp, s);
 623                        }
 624                }
 625                /* drop down a level to add left subtree nodes if possible */
 626                if (DELTA_CHILD(e)) {
 627                        add_to_order = 1;
 628                        e = DELTA_CHILD(e);
 629                } else {
 630                        add_to_order = 0;
 631                        /* our sibling might have some children, it is next */
 632                        if (DELTA_SIBLING(e)) {
 633                                e = DELTA_SIBLING(e);
 634                                continue;
 635                        }
 636                        /* go back to our parent node */
 637                        e = DELTA(e);
 638                        while (e && !DELTA_SIBLING(e)) {
 639                                /* we're on the right side of a subtree, keep
 640                                 * going up until we can go right again */
 641                                e = DELTA(e);
 642                        }
 643                        if (!e) {
 644                                /* done- we hit our original root node */
 645                                return;
 646                        }
 647                        /* pass it off to sibling at this level */
 648                        e = DELTA_SIBLING(e);
 649                }
 650        };
 651}
 652
 653static void add_family_to_write_order(struct object_entry **wo,
 654                                      unsigned int *endp,
 655                                      struct object_entry *e)
 656{
 657        struct object_entry *root;
 658
 659        for (root = e; DELTA(root); root = DELTA(root))
 660                ; /* nothing */
 661        add_descendants_to_write_order(wo, endp, root);
 662}
 663
 664static struct object_entry **compute_write_order(void)
 665{
 666        unsigned int i, wo_end, last_untagged;
 667
 668        struct object_entry **wo;
 669        struct object_entry *objects = to_pack.objects;
 670
 671        for (i = 0; i < to_pack.nr_objects; i++) {
 672                objects[i].tagged = 0;
 673                objects[i].filled = 0;
 674                SET_DELTA_CHILD(&objects[i], NULL);
 675                SET_DELTA_SIBLING(&objects[i], NULL);
 676        }
 677
 678        /*
 679         * Fully connect delta_child/delta_sibling network.
 680         * Make sure delta_sibling is sorted in the original
 681         * recency order.
 682         */
 683        for (i = to_pack.nr_objects; i > 0;) {
 684                struct object_entry *e = &objects[--i];
 685                if (!DELTA(e))
 686                        continue;
 687                /* Mark me as the first child */
 688                e->delta_sibling_idx = DELTA(e)->delta_child_idx;
 689                SET_DELTA_CHILD(DELTA(e), e);
 690        }
 691
 692        /*
 693         * Mark objects that are at the tip of tags.
 694         */
 695        for_each_tag_ref(mark_tagged, NULL);
 696
 697        /*
 698         * Give the objects in the original recency order until
 699         * we see a tagged tip.
 700         */
 701        ALLOC_ARRAY(wo, to_pack.nr_objects);
 702        for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
 703                if (objects[i].tagged)
 704                        break;
 705                add_to_write_order(wo, &wo_end, &objects[i]);
 706        }
 707        last_untagged = i;
 708
 709        /*
 710         * Then fill all the tagged tips.
 711         */
 712        for (; i < to_pack.nr_objects; i++) {
 713                if (objects[i].tagged)
 714                        add_to_write_order(wo, &wo_end, &objects[i]);
 715        }
 716
 717        /*
 718         * And then all remaining commits and tags.
 719         */
 720        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 721                if (oe_type(&objects[i]) != OBJ_COMMIT &&
 722                    oe_type(&objects[i]) != OBJ_TAG)
 723                        continue;
 724                add_to_write_order(wo, &wo_end, &objects[i]);
 725        }
 726
 727        /*
 728         * And then all the trees.
 729         */
 730        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 731                if (oe_type(&objects[i]) != OBJ_TREE)
 732                        continue;
 733                add_to_write_order(wo, &wo_end, &objects[i]);
 734        }
 735
 736        /*
 737         * Finally all the rest in really tight order
 738         */
 739        for (i = last_untagged; i < to_pack.nr_objects; i++) {
 740                if (!objects[i].filled)
 741                        add_family_to_write_order(wo, &wo_end, &objects[i]);
 742        }
 743
 744        if (wo_end != to_pack.nr_objects)
 745                die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
 746
 747        return wo;
 748}
 749
 750static off_t write_reused_pack(struct hashfile *f)
 751{
 752        unsigned char buffer[8192];
 753        off_t to_write, total;
 754        int fd;
 755
 756        if (!is_pack_valid(reuse_packfile))
 757                die("packfile is invalid: %s", reuse_packfile->pack_name);
 758
 759        fd = git_open(reuse_packfile->pack_name);
 760        if (fd < 0)
 761                die_errno("unable to open packfile for reuse: %s",
 762                          reuse_packfile->pack_name);
 763
 764        if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
 765                die_errno("unable to seek in reused packfile");
 766
 767        if (reuse_packfile_offset < 0)
 768                reuse_packfile_offset = reuse_packfile->pack_size - 20;
 769
 770        total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
 771
 772        while (to_write) {
 773                int read_pack = xread(fd, buffer, sizeof(buffer));
 774
 775                if (read_pack <= 0)
 776                        die_errno("unable to read from reused packfile");
 777
 778                if (read_pack > to_write)
 779                        read_pack = to_write;
 780
 781                hashwrite(f, buffer, read_pack);
 782                to_write -= read_pack;
 783
 784                /*
 785                 * We don't know the actual number of objects written,
 786                 * only how many bytes written, how many bytes total, and
 787                 * how many objects total. So we can fake it by pretending all
 788                 * objects we are writing are the same size. This gives us a
 789                 * smooth progress meter, and at the end it matches the true
 790                 * answer.
 791                 */
 792                written = reuse_packfile_objects *
 793                                (((double)(total - to_write)) / total);
 794                display_progress(progress_state, written);
 795        }
 796
 797        close(fd);
 798        written = reuse_packfile_objects;
 799        display_progress(progress_state, written);
 800        return reuse_packfile_offset - sizeof(struct pack_header);
 801}
 802
 803static const char no_split_warning[] = N_(
 804"disabling bitmap writing, packs are split due to pack.packSizeLimit"
 805);
 806
 807static void write_pack_file(void)
 808{
 809        uint32_t i = 0, j;
 810        struct hashfile *f;
 811        off_t offset;
 812        uint32_t nr_remaining = nr_result;
 813        time_t last_mtime = 0;
 814        struct object_entry **write_order;
 815
 816        if (progress > pack_to_stdout)
 817                progress_state = start_progress(_("Writing objects"), nr_result);
 818        ALLOC_ARRAY(written_list, to_pack.nr_objects);
 819        write_order = compute_write_order();
 820
 821        do {
 822                struct object_id oid;
 823                char *pack_tmp_name = NULL;
 824
 825                if (pack_to_stdout)
 826                        f = hashfd_throughput(1, "<stdout>", progress_state);
 827                else
 828                        f = create_tmp_packfile(&pack_tmp_name);
 829
 830                offset = write_pack_header(f, nr_remaining);
 831
 832                if (reuse_packfile) {
 833                        off_t packfile_size;
 834                        assert(pack_to_stdout);
 835
 836                        packfile_size = write_reused_pack(f);
 837                        offset += packfile_size;
 838                }
 839
 840                nr_written = 0;
 841                for (; i < to_pack.nr_objects; i++) {
 842                        struct object_entry *e = write_order[i];
 843                        if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
 844                                break;
 845                        display_progress(progress_state, written);
 846                }
 847
 848                /*
 849                 * Did we write the wrong # entries in the header?
 850                 * If so, rewrite it like in fast-import
 851                 */
 852                if (pack_to_stdout) {
 853                        hashclose(f, oid.hash, CSUM_CLOSE);
 854                } else if (nr_written == nr_remaining) {
 855                        hashclose(f, oid.hash, CSUM_FSYNC);
 856                } else {
 857                        int fd = hashclose(f, oid.hash, 0);
 858                        fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
 859                                                 nr_written, oid.hash, offset);
 860                        close(fd);
 861                        if (write_bitmap_index) {
 862                                warning(_(no_split_warning));
 863                                write_bitmap_index = 0;
 864                        }
 865                }
 866
 867                if (!pack_to_stdout) {
 868                        struct stat st;
 869                        struct strbuf tmpname = STRBUF_INIT;
 870
 871                        /*
 872                         * Packs are runtime accessed in their mtime
 873                         * order since newer packs are more likely to contain
 874                         * younger objects.  So if we are creating multiple
 875                         * packs then we should modify the mtime of later ones
 876                         * to preserve this property.
 877                         */
 878                        if (stat(pack_tmp_name, &st) < 0) {
 879                                warning_errno("failed to stat %s", pack_tmp_name);
 880                        } else if (!last_mtime) {
 881                                last_mtime = st.st_mtime;
 882                        } else {
 883                                struct utimbuf utb;
 884                                utb.actime = st.st_atime;
 885                                utb.modtime = --last_mtime;
 886                                if (utime(pack_tmp_name, &utb) < 0)
 887                                        warning_errno("failed utime() on %s", pack_tmp_name);
 888                        }
 889
 890                        strbuf_addf(&tmpname, "%s-", base_name);
 891
 892                        if (write_bitmap_index) {
 893                                bitmap_writer_set_checksum(oid.hash);
 894                                bitmap_writer_build_type_index(
 895                                        &to_pack, written_list, nr_written);
 896                        }
 897
 898                        finish_tmp_packfile(&tmpname, pack_tmp_name,
 899                                            written_list, nr_written,
 900                                            &pack_idx_opts, oid.hash);
 901
 902                        if (write_bitmap_index) {
 903                                strbuf_addf(&tmpname, "%s.bitmap", oid_to_hex(&oid));
 904
 905                                stop_progress(&progress_state);
 906
 907                                bitmap_writer_show_progress(progress);
 908                                bitmap_writer_reuse_bitmaps(&to_pack);
 909                                bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
 910                                bitmap_writer_build(&to_pack);
 911                                bitmap_writer_finish(written_list, nr_written,
 912                                                     tmpname.buf, write_bitmap_options);
 913                                write_bitmap_index = 0;
 914                        }
 915
 916                        strbuf_release(&tmpname);
 917                        free(pack_tmp_name);
 918                        puts(oid_to_hex(&oid));
 919                }
 920
 921                /* mark written objects as written to previous pack */
 922                for (j = 0; j < nr_written; j++) {
 923                        written_list[j]->offset = (off_t)-1;
 924                }
 925                nr_remaining -= nr_written;
 926        } while (nr_remaining && i < to_pack.nr_objects);
 927
 928        free(written_list);
 929        free(write_order);
 930        stop_progress(&progress_state);
 931        if (written != nr_result)
 932                die("wrote %"PRIu32" objects while expecting %"PRIu32,
 933                        written, nr_result);
 934}
 935
 936static int no_try_delta(const char *path)
 937{
 938        static struct attr_check *check;
 939
 940        if (!check)
 941                check = attr_check_initl("delta", NULL);
 942        if (git_check_attr(path, check))
 943                return 0;
 944        if (ATTR_FALSE(check->items[0].value))
 945                return 1;
 946        return 0;
 947}
 948
 949/*
 950 * When adding an object, check whether we have already added it
 951 * to our packing list. If so, we can skip. However, if we are
 952 * being asked to excludei t, but the previous mention was to include
 953 * it, make sure to adjust its flags and tweak our numbers accordingly.
 954 *
 955 * As an optimization, we pass out the index position where we would have
 956 * found the item, since that saves us from having to look it up again a
 957 * few lines later when we want to add the new entry.
 958 */
 959static int have_duplicate_entry(const struct object_id *oid,
 960                                int exclude,
 961                                uint32_t *index_pos)
 962{
 963        struct object_entry *entry;
 964
 965        entry = packlist_find(&to_pack, oid->hash, index_pos);
 966        if (!entry)
 967                return 0;
 968
 969        if (exclude) {
 970                if (!entry->preferred_base)
 971                        nr_result--;
 972                entry->preferred_base = 1;
 973        }
 974
 975        return 1;
 976}
 977
 978static int want_found_object(int exclude, struct packed_git *p)
 979{
 980        if (exclude)
 981                return 1;
 982        if (incremental)
 983                return 0;
 984
 985        /*
 986         * When asked to do --local (do not include an object that appears in a
 987         * pack we borrow from elsewhere) or --honor-pack-keep (do not include
 988         * an object that appears in a pack marked with .keep), finding a pack
 989         * that matches the criteria is sufficient for us to decide to omit it.
 990         * However, even if this pack does not satisfy the criteria, we need to
 991         * make sure no copy of this object appears in _any_ pack that makes us
 992         * to omit the object, so we need to check all the packs.
 993         *
 994         * We can however first check whether these options can possible matter;
 995         * if they do not matter we know we want the object in generated pack.
 996         * Otherwise, we signal "-1" at the end to tell the caller that we do
 997         * not know either way, and it needs to check more packs.
 998         */
 999        if (!ignore_packed_keep &&
1000            (!local || !have_non_local_packs))
1001                return 1;
1002
1003        if (local && !p->pack_local)
1004                return 0;
1005        if (ignore_packed_keep && p->pack_local && p->pack_keep)
1006                return 0;
1007
1008        /* we don't know yet; keep looking for more packs */
1009        return -1;
1010}
1011
1012/*
1013 * Check whether we want the object in the pack (e.g., we do not want
1014 * objects found in non-local stores if the "--local" option was used).
1015 *
1016 * If the caller already knows an existing pack it wants to take the object
1017 * from, that is passed in *found_pack and *found_offset; otherwise this
1018 * function finds if there is any pack that has the object and returns the pack
1019 * and its offset in these variables.
1020 */
1021static int want_object_in_pack(const struct object_id *oid,
1022                               int exclude,
1023                               struct packed_git **found_pack,
1024                               off_t *found_offset)
1025{
1026        int want;
1027        struct list_head *pos;
1028
1029        if (!exclude && local && has_loose_object_nonlocal(oid->hash))
1030                return 0;
1031
1032        /*
1033         * If we already know the pack object lives in, start checks from that
1034         * pack - in the usual case when neither --local was given nor .keep files
1035         * are present we will determine the answer right now.
1036         */
1037        if (*found_pack) {
1038                want = want_found_object(exclude, *found_pack);
1039                if (want != -1)
1040                        return want;
1041        }
1042        list_for_each(pos, get_packed_git_mru(the_repository)) {
1043                struct packed_git *p = list_entry(pos, struct packed_git, mru);
1044                off_t offset;
1045
1046                if (p == *found_pack)
1047                        offset = *found_offset;
1048                else
1049                        offset = find_pack_entry_one(oid->hash, p);
1050
1051                if (offset) {
1052                        if (!*found_pack) {
1053                                if (!is_pack_valid(p))
1054                                        continue;
1055                                *found_offset = offset;
1056                                *found_pack = p;
1057                        }
1058                        want = want_found_object(exclude, p);
1059                        if (!exclude && want > 0)
1060                                list_move(&p->mru,
1061                                          get_packed_git_mru(the_repository));
1062                        if (want != -1)
1063                                return want;
1064                }
1065        }
1066
1067        return 1;
1068}
1069
1070static void create_object_entry(const struct object_id *oid,
1071                                enum object_type type,
1072                                uint32_t hash,
1073                                int exclude,
1074                                int no_try_delta,
1075                                uint32_t index_pos,
1076                                struct packed_git *found_pack,
1077                                off_t found_offset)
1078{
1079        struct object_entry *entry;
1080
1081        entry = packlist_alloc(&to_pack, oid->hash, index_pos);
1082        entry->hash = hash;
1083        oe_set_type(entry, type);
1084        if (exclude)
1085                entry->preferred_base = 1;
1086        else
1087                nr_result++;
1088        if (found_pack) {
1089                oe_set_in_pack(&to_pack, entry, found_pack);
1090                entry->in_pack_offset = found_offset;
1091        }
1092
1093        entry->no_try_delta = no_try_delta;
1094}
1095
1096static const char no_closure_warning[] = N_(
1097"disabling bitmap writing, as some objects are not being packed"
1098);
1099
1100static int add_object_entry(const struct object_id *oid, enum object_type type,
1101                            const char *name, int exclude)
1102{
1103        struct packed_git *found_pack = NULL;
1104        off_t found_offset = 0;
1105        uint32_t index_pos;
1106
1107        if (have_duplicate_entry(oid, exclude, &index_pos))
1108                return 0;
1109
1110        if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1111                /* The pack is missing an object, so it will not have closure */
1112                if (write_bitmap_index) {
1113                        warning(_(no_closure_warning));
1114                        write_bitmap_index = 0;
1115                }
1116                return 0;
1117        }
1118
1119        create_object_entry(oid, type, pack_name_hash(name),
1120                            exclude, name && no_try_delta(name),
1121                            index_pos, found_pack, found_offset);
1122
1123        display_progress(progress_state, nr_result);
1124        return 1;
1125}
1126
1127static int add_object_entry_from_bitmap(const struct object_id *oid,
1128                                        enum object_type type,
1129                                        int flags, uint32_t name_hash,
1130                                        struct packed_git *pack, off_t offset)
1131{
1132        uint32_t index_pos;
1133
1134        if (have_duplicate_entry(oid, 0, &index_pos))
1135                return 0;
1136
1137        if (!want_object_in_pack(oid, 0, &pack, &offset))
1138                return 0;
1139
1140        create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset);
1141
1142        display_progress(progress_state, nr_result);
1143        return 1;
1144}
1145
1146struct pbase_tree_cache {
1147        struct object_id oid;
1148        int ref;
1149        int temporary;
1150        void *tree_data;
1151        unsigned long tree_size;
1152};
1153
1154static struct pbase_tree_cache *(pbase_tree_cache[256]);
1155static int pbase_tree_cache_ix(const struct object_id *oid)
1156{
1157        return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1158}
1159static int pbase_tree_cache_ix_incr(int ix)
1160{
1161        return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1162}
1163
1164static struct pbase_tree {
1165        struct pbase_tree *next;
1166        /* This is a phony "cache" entry; we are not
1167         * going to evict it or find it through _get()
1168         * mechanism -- this is for the toplevel node that
1169         * would almost always change with any commit.
1170         */
1171        struct pbase_tree_cache pcache;
1172} *pbase_tree;
1173
1174static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1175{
1176        struct pbase_tree_cache *ent, *nent;
1177        void *data;
1178        unsigned long size;
1179        enum object_type type;
1180        int neigh;
1181        int my_ix = pbase_tree_cache_ix(oid);
1182        int available_ix = -1;
1183
1184        /* pbase-tree-cache acts as a limited hashtable.
1185         * your object will be found at your index or within a few
1186         * slots after that slot if it is cached.
1187         */
1188        for (neigh = 0; neigh < 8; neigh++) {
1189                ent = pbase_tree_cache[my_ix];
1190                if (ent && !oidcmp(&ent->oid, oid)) {
1191                        ent->ref++;
1192                        return ent;
1193                }
1194                else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1195                         ((0 <= available_ix) &&
1196                          (!ent && pbase_tree_cache[available_ix])))
1197                        available_ix = my_ix;
1198                if (!ent)
1199                        break;
1200                my_ix = pbase_tree_cache_ix_incr(my_ix);
1201        }
1202
1203        /* Did not find one.  Either we got a bogus request or
1204         * we need to read and perhaps cache.
1205         */
1206        data = read_object_file(oid, &type, &size);
1207        if (!data)
1208                return NULL;
1209        if (type != OBJ_TREE) {
1210                free(data);
1211                return NULL;
1212        }
1213
1214        /* We need to either cache or return a throwaway copy */
1215
1216        if (available_ix < 0)
1217                ent = NULL;
1218        else {
1219                ent = pbase_tree_cache[available_ix];
1220                my_ix = available_ix;
1221        }
1222
1223        if (!ent) {
1224                nent = xmalloc(sizeof(*nent));
1225                nent->temporary = (available_ix < 0);
1226        }
1227        else {
1228                /* evict and reuse */
1229                free(ent->tree_data);
1230                nent = ent;
1231        }
1232        oidcpy(&nent->oid, oid);
1233        nent->tree_data = data;
1234        nent->tree_size = size;
1235        nent->ref = 1;
1236        if (!nent->temporary)
1237                pbase_tree_cache[my_ix] = nent;
1238        return nent;
1239}
1240
1241static void pbase_tree_put(struct pbase_tree_cache *cache)
1242{
1243        if (!cache->temporary) {
1244                cache->ref--;
1245                return;
1246        }
1247        free(cache->tree_data);
1248        free(cache);
1249}
1250
1251static int name_cmp_len(const char *name)
1252{
1253        int i;
1254        for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
1255                ;
1256        return i;
1257}
1258
1259static void add_pbase_object(struct tree_desc *tree,
1260                             const char *name,
1261                             int cmplen,
1262                             const char *fullname)
1263{
1264        struct name_entry entry;
1265        int cmp;
1266
1267        while (tree_entry(tree,&entry)) {
1268                if (S_ISGITLINK(entry.mode))
1269                        continue;
1270                cmp = tree_entry_len(&entry) != cmplen ? 1 :
1271                      memcmp(name, entry.path, cmplen);
1272                if (cmp > 0)
1273                        continue;
1274                if (cmp < 0)
1275                        return;
1276                if (name[cmplen] != '/') {
1277                        add_object_entry(entry.oid,
1278                                         object_type(entry.mode),
1279                                         fullname, 1);
1280                        return;
1281                }
1282                if (S_ISDIR(entry.mode)) {
1283                        struct tree_desc sub;
1284                        struct pbase_tree_cache *tree;
1285                        const char *down = name+cmplen+1;
1286                        int downlen = name_cmp_len(down);
1287
1288                        tree = pbase_tree_get(entry.oid);
1289                        if (!tree)
1290                                return;
1291                        init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1292
1293                        add_pbase_object(&sub, down, downlen, fullname);
1294                        pbase_tree_put(tree);
1295                }
1296        }
1297}
1298
1299static unsigned *done_pbase_paths;
1300static int done_pbase_paths_num;
1301static int done_pbase_paths_alloc;
1302static int done_pbase_path_pos(unsigned hash)
1303{
1304        int lo = 0;
1305        int hi = done_pbase_paths_num;
1306        while (lo < hi) {
1307                int mi = lo + (hi - lo) / 2;
1308                if (done_pbase_paths[mi] == hash)
1309                        return mi;
1310                if (done_pbase_paths[mi] < hash)
1311                        hi = mi;
1312                else
1313                        lo = mi + 1;
1314        }
1315        return -lo-1;
1316}
1317
1318static int check_pbase_path(unsigned hash)
1319{
1320        int pos = done_pbase_path_pos(hash);
1321        if (0 <= pos)
1322                return 1;
1323        pos = -pos - 1;
1324        ALLOC_GROW(done_pbase_paths,
1325                   done_pbase_paths_num + 1,
1326                   done_pbase_paths_alloc);
1327        done_pbase_paths_num++;
1328        if (pos < done_pbase_paths_num)
1329                MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1330                           done_pbase_paths_num - pos - 1);
1331        done_pbase_paths[pos] = hash;
1332        return 0;
1333}
1334
1335static void add_preferred_base_object(const char *name)
1336{
1337        struct pbase_tree *it;
1338        int cmplen;
1339        unsigned hash = pack_name_hash(name);
1340
1341        if (!num_preferred_base || check_pbase_path(hash))
1342                return;
1343
1344        cmplen = name_cmp_len(name);
1345        for (it = pbase_tree; it; it = it->next) {
1346                if (cmplen == 0) {
1347                        add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1348                }
1349                else {
1350                        struct tree_desc tree;
1351                        init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1352                        add_pbase_object(&tree, name, cmplen, name);
1353                }
1354        }
1355}
1356
1357static void add_preferred_base(struct object_id *oid)
1358{
1359        struct pbase_tree *it;
1360        void *data;
1361        unsigned long size;
1362        struct object_id tree_oid;
1363
1364        if (window <= num_preferred_base++)
1365                return;
1366
1367        data = read_object_with_reference(oid, tree_type, &size, &tree_oid);
1368        if (!data)
1369                return;
1370
1371        for (it = pbase_tree; it; it = it->next) {
1372                if (!oidcmp(&it->pcache.oid, &tree_oid)) {
1373                        free(data);
1374                        return;
1375                }
1376        }
1377
1378        it = xcalloc(1, sizeof(*it));
1379        it->next = pbase_tree;
1380        pbase_tree = it;
1381
1382        oidcpy(&it->pcache.oid, &tree_oid);
1383        it->pcache.tree_data = data;
1384        it->pcache.tree_size = size;
1385}
1386
1387static void cleanup_preferred_base(void)
1388{
1389        struct pbase_tree *it;
1390        unsigned i;
1391
1392        it = pbase_tree;
1393        pbase_tree = NULL;
1394        while (it) {
1395                struct pbase_tree *tmp = it;
1396                it = tmp->next;
1397                free(tmp->pcache.tree_data);
1398                free(tmp);
1399        }
1400
1401        for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1402                if (!pbase_tree_cache[i])
1403                        continue;
1404                free(pbase_tree_cache[i]->tree_data);
1405                FREE_AND_NULL(pbase_tree_cache[i]);
1406        }
1407
1408        FREE_AND_NULL(done_pbase_paths);
1409        done_pbase_paths_num = done_pbase_paths_alloc = 0;
1410}
1411
1412static void check_object(struct object_entry *entry)
1413{
1414        unsigned long canonical_size;
1415
1416        if (IN_PACK(entry)) {
1417                struct packed_git *p = IN_PACK(entry);
1418                struct pack_window *w_curs = NULL;
1419                const unsigned char *base_ref = NULL;
1420                struct object_entry *base_entry;
1421                unsigned long used, used_0;
1422                unsigned long avail;
1423                off_t ofs;
1424                unsigned char *buf, c;
1425                enum object_type type;
1426                unsigned long in_pack_size;
1427
1428                buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1429
1430                /*
1431                 * We want in_pack_type even if we do not reuse delta
1432                 * since non-delta representations could still be reused.
1433                 */
1434                used = unpack_object_header_buffer(buf, avail,
1435                                                   &type,
1436                                                   &in_pack_size);
1437                if (used == 0)
1438                        goto give_up;
1439
1440                if (type < 0)
1441                        BUG("invalid type %d", type);
1442                entry->in_pack_type = type;
1443
1444                /*
1445                 * Determine if this is a delta and if so whether we can
1446                 * reuse it or not.  Otherwise let's find out as cheaply as
1447                 * possible what the actual type and size for this object is.
1448                 */
1449                switch (entry->in_pack_type) {
1450                default:
1451                        /* Not a delta hence we've already got all we need. */
1452                        oe_set_type(entry, entry->in_pack_type);
1453                        SET_SIZE(entry, in_pack_size);
1454                        entry->in_pack_header_size = used;
1455                        if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1456                                goto give_up;
1457                        unuse_pack(&w_curs);
1458                        return;
1459                case OBJ_REF_DELTA:
1460                        if (reuse_delta && !entry->preferred_base)
1461                                base_ref = use_pack(p, &w_curs,
1462                                                entry->in_pack_offset + used, NULL);
1463                        entry->in_pack_header_size = used + 20;
1464                        break;
1465                case OBJ_OFS_DELTA:
1466                        buf = use_pack(p, &w_curs,
1467                                       entry->in_pack_offset + used, NULL);
1468                        used_0 = 0;
1469                        c = buf[used_0++];
1470                        ofs = c & 127;
1471                        while (c & 128) {
1472                                ofs += 1;
1473                                if (!ofs || MSB(ofs, 7)) {
1474                                        error("delta base offset overflow in pack for %s",
1475                                              oid_to_hex(&entry->idx.oid));
1476                                        goto give_up;
1477                                }
1478                                c = buf[used_0++];
1479                                ofs = (ofs << 7) + (c & 127);
1480                        }
1481                        ofs = entry->in_pack_offset - ofs;
1482                        if (ofs <= 0 || ofs >= entry->in_pack_offset) {
1483                                error("delta base offset out of bound for %s",
1484                                      oid_to_hex(&entry->idx.oid));
1485                                goto give_up;
1486                        }
1487                        if (reuse_delta && !entry->preferred_base) {
1488                                struct revindex_entry *revidx;
1489                                revidx = find_pack_revindex(p, ofs);
1490                                if (!revidx)
1491                                        goto give_up;
1492                                base_ref = nth_packed_object_sha1(p, revidx->nr);
1493                        }
1494                        entry->in_pack_header_size = used + used_0;
1495                        break;
1496                }
1497
1498                if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
1499                        /*
1500                         * If base_ref was set above that means we wish to
1501                         * reuse delta data, and we even found that base
1502                         * in the list of objects we want to pack. Goodie!
1503                         *
1504                         * Depth value does not matter - find_deltas() will
1505                         * never consider reused delta as the base object to
1506                         * deltify other objects against, in order to avoid
1507                         * circular deltas.
1508                         */
1509                        oe_set_type(entry, entry->in_pack_type);
1510                        SET_SIZE(entry, in_pack_size); /* delta size */
1511                        SET_DELTA(entry, base_entry);
1512                        entry->delta_size = in_pack_size;
1513                        entry->delta_sibling_idx = base_entry->delta_child_idx;
1514                        SET_DELTA_CHILD(base_entry, entry);
1515                        unuse_pack(&w_curs);
1516                        return;
1517                }
1518
1519                if (oe_type(entry)) {
1520                        off_t delta_pos;
1521
1522                        /*
1523                         * This must be a delta and we already know what the
1524                         * final object type is.  Let's extract the actual
1525                         * object size from the delta header.
1526                         */
1527                        delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
1528                        canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
1529                        if (canonical_size == 0)
1530                                goto give_up;
1531                        SET_SIZE(entry, canonical_size);
1532                        unuse_pack(&w_curs);
1533                        return;
1534                }
1535
1536                /*
1537                 * No choice but to fall back to the recursive delta walk
1538                 * with sha1_object_info() to find about the object type
1539                 * at this point...
1540                 */
1541                give_up:
1542                unuse_pack(&w_curs);
1543        }
1544
1545        oe_set_type(entry, oid_object_info(&entry->idx.oid, &canonical_size));
1546        if (entry->type_valid) {
1547                SET_SIZE(entry, canonical_size);
1548        } else {
1549                /*
1550                 * Bad object type is checked in prepare_pack().  This is
1551                 * to permit a missing preferred base object to be ignored
1552                 * as a preferred base.  Doing so can result in a larger
1553                 * pack file, but the transfer will still take place.
1554                 */
1555        }
1556}
1557
1558static int pack_offset_sort(const void *_a, const void *_b)
1559{
1560        const struct object_entry *a = *(struct object_entry **)_a;
1561        const struct object_entry *b = *(struct object_entry **)_b;
1562        const struct packed_git *a_in_pack = IN_PACK(a);
1563        const struct packed_git *b_in_pack = IN_PACK(b);
1564
1565        /* avoid filesystem trashing with loose objects */
1566        if (!a_in_pack && !b_in_pack)
1567                return oidcmp(&a->idx.oid, &b->idx.oid);
1568
1569        if (a_in_pack < b_in_pack)
1570                return -1;
1571        if (a_in_pack > b_in_pack)
1572                return 1;
1573        return a->in_pack_offset < b->in_pack_offset ? -1 :
1574                        (a->in_pack_offset > b->in_pack_offset);
1575}
1576
1577/*
1578 * Drop an on-disk delta we were planning to reuse. Naively, this would
1579 * just involve blanking out the "delta" field, but we have to deal
1580 * with some extra book-keeping:
1581 *
1582 *   1. Removing ourselves from the delta_sibling linked list.
1583 *
1584 *   2. Updating our size/type to the non-delta representation. These were
1585 *      either not recorded initially (size) or overwritten with the delta type
1586 *      (type) when check_object() decided to reuse the delta.
1587 *
1588 *   3. Resetting our delta depth, as we are now a base object.
1589 */
1590static void drop_reused_delta(struct object_entry *entry)
1591{
1592        unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
1593        struct object_info oi = OBJECT_INFO_INIT;
1594        enum object_type type;
1595        unsigned long size;
1596
1597        while (*idx) {
1598                struct object_entry *oe = &to_pack.objects[*idx - 1];
1599
1600                if (oe == entry)
1601                        *idx = oe->delta_sibling_idx;
1602                else
1603                        idx = &oe->delta_sibling_idx;
1604        }
1605        SET_DELTA(entry, NULL);
1606        entry->depth = 0;
1607
1608        oi.sizep = &size;
1609        oi.typep = &type;
1610        if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
1611                /*
1612                 * We failed to get the info from this pack for some reason;
1613                 * fall back to sha1_object_info, which may find another copy.
1614                 * And if that fails, the error will be recorded in oe_type(entry)
1615                 * and dealt with in prepare_pack().
1616                 */
1617                oe_set_type(entry, oid_object_info(&entry->idx.oid, &size));
1618        } else {
1619                oe_set_type(entry, type);
1620        }
1621        SET_SIZE(entry, size);
1622}
1623
1624/*
1625 * Follow the chain of deltas from this entry onward, throwing away any links
1626 * that cause us to hit a cycle (as determined by the DFS state flags in
1627 * the entries).
1628 *
1629 * We also detect too-long reused chains that would violate our --depth
1630 * limit.
1631 */
1632static void break_delta_chains(struct object_entry *entry)
1633{
1634        /*
1635         * The actual depth of each object we will write is stored as an int,
1636         * as it cannot exceed our int "depth" limit. But before we break
1637         * changes based no that limit, we may potentially go as deep as the
1638         * number of objects, which is elsewhere bounded to a uint32_t.
1639         */
1640        uint32_t total_depth;
1641        struct object_entry *cur, *next;
1642
1643        for (cur = entry, total_depth = 0;
1644             cur;
1645             cur = DELTA(cur), total_depth++) {
1646                if (cur->dfs_state == DFS_DONE) {
1647                        /*
1648                         * We've already seen this object and know it isn't
1649                         * part of a cycle. We do need to append its depth
1650                         * to our count.
1651                         */
1652                        total_depth += cur->depth;
1653                        break;
1654                }
1655
1656                /*
1657                 * We break cycles before looping, so an ACTIVE state (or any
1658                 * other cruft which made its way into the state variable)
1659                 * is a bug.
1660                 */
1661                if (cur->dfs_state != DFS_NONE)
1662                        die("BUG: confusing delta dfs state in first pass: %d",
1663                            cur->dfs_state);
1664
1665                /*
1666                 * Now we know this is the first time we've seen the object. If
1667                 * it's not a delta, we're done traversing, but we'll mark it
1668                 * done to save time on future traversals.
1669                 */
1670                if (!DELTA(cur)) {
1671                        cur->dfs_state = DFS_DONE;
1672                        break;
1673                }
1674
1675                /*
1676                 * Mark ourselves as active and see if the next step causes
1677                 * us to cycle to another active object. It's important to do
1678                 * this _before_ we loop, because it impacts where we make the
1679                 * cut, and thus how our total_depth counter works.
1680                 * E.g., We may see a partial loop like:
1681                 *
1682                 *   A -> B -> C -> D -> B
1683                 *
1684                 * Cutting B->C breaks the cycle. But now the depth of A is
1685                 * only 1, and our total_depth counter is at 3. The size of the
1686                 * error is always one less than the size of the cycle we
1687                 * broke. Commits C and D were "lost" from A's chain.
1688                 *
1689                 * If we instead cut D->B, then the depth of A is correct at 3.
1690                 * We keep all commits in the chain that we examined.
1691                 */
1692                cur->dfs_state = DFS_ACTIVE;
1693                if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
1694                        drop_reused_delta(cur);
1695                        cur->dfs_state = DFS_DONE;
1696                        break;
1697                }
1698        }
1699
1700        /*
1701         * And now that we've gone all the way to the bottom of the chain, we
1702         * need to clear the active flags and set the depth fields as
1703         * appropriate. Unlike the loop above, which can quit when it drops a
1704         * delta, we need to keep going to look for more depth cuts. So we need
1705         * an extra "next" pointer to keep going after we reset cur->delta.
1706         */
1707        for (cur = entry; cur; cur = next) {
1708                next = DELTA(cur);
1709
1710                /*
1711                 * We should have a chain of zero or more ACTIVE states down to
1712                 * a final DONE. We can quit after the DONE, because either it
1713                 * has no bases, or we've already handled them in a previous
1714                 * call.
1715                 */
1716                if (cur->dfs_state == DFS_DONE)
1717                        break;
1718                else if (cur->dfs_state != DFS_ACTIVE)
1719                        die("BUG: confusing delta dfs state in second pass: %d",
1720                            cur->dfs_state);
1721
1722                /*
1723                 * If the total_depth is more than depth, then we need to snip
1724                 * the chain into two or more smaller chains that don't exceed
1725                 * the maximum depth. Most of the resulting chains will contain
1726                 * (depth + 1) entries (i.e., depth deltas plus one base), and
1727                 * the last chain (i.e., the one containing entry) will contain
1728                 * whatever entries are left over, namely
1729                 * (total_depth % (depth + 1)) of them.
1730                 *
1731                 * Since we are iterating towards decreasing depth, we need to
1732                 * decrement total_depth as we go, and we need to write to the
1733                 * entry what its final depth will be after all of the
1734                 * snipping. Since we're snipping into chains of length (depth
1735                 * + 1) entries, the final depth of an entry will be its
1736                 * original depth modulo (depth + 1). Any time we encounter an
1737                 * entry whose final depth is supposed to be zero, we snip it
1738                 * from its delta base, thereby making it so.
1739                 */
1740                cur->depth = (total_depth--) % (depth + 1);
1741                if (!cur->depth)
1742                        drop_reused_delta(cur);
1743
1744                cur->dfs_state = DFS_DONE;
1745        }
1746}
1747
1748static void get_object_details(void)
1749{
1750        uint32_t i;
1751        struct object_entry **sorted_by_offset;
1752
1753        sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
1754        for (i = 0; i < to_pack.nr_objects; i++)
1755                sorted_by_offset[i] = to_pack.objects + i;
1756        QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
1757
1758        for (i = 0; i < to_pack.nr_objects; i++) {
1759                struct object_entry *entry = sorted_by_offset[i];
1760                check_object(entry);
1761                if (entry->type_valid &&
1762                    oe_size_greater_than(&to_pack, entry, big_file_threshold))
1763                        entry->no_try_delta = 1;
1764        }
1765
1766        /*
1767         * This must happen in a second pass, since we rely on the delta
1768         * information for the whole list being completed.
1769         */
1770        for (i = 0; i < to_pack.nr_objects; i++)
1771                break_delta_chains(&to_pack.objects[i]);
1772
1773        free(sorted_by_offset);
1774}
1775
1776/*
1777 * We search for deltas in a list sorted by type, by filename hash, and then
1778 * by size, so that we see progressively smaller and smaller files.
1779 * That's because we prefer deltas to be from the bigger file
1780 * to the smaller -- deletes are potentially cheaper, but perhaps
1781 * more importantly, the bigger file is likely the more recent
1782 * one.  The deepest deltas are therefore the oldest objects which are
1783 * less susceptible to be accessed often.
1784 */
1785static int type_size_sort(const void *_a, const void *_b)
1786{
1787        const struct object_entry *a = *(struct object_entry **)_a;
1788        const struct object_entry *b = *(struct object_entry **)_b;
1789        enum object_type a_type = oe_type(a);
1790        enum object_type b_type = oe_type(b);
1791        unsigned long a_size = SIZE(a);
1792        unsigned long b_size = SIZE(b);
1793
1794        if (a_type > b_type)
1795                return -1;
1796        if (a_type < b_type)
1797                return 1;
1798        if (a->hash > b->hash)
1799                return -1;
1800        if (a->hash < b->hash)
1801                return 1;
1802        if (a->preferred_base > b->preferred_base)
1803                return -1;
1804        if (a->preferred_base < b->preferred_base)
1805                return 1;
1806        if (a_size > b_size)
1807                return -1;
1808        if (a_size < b_size)
1809                return 1;
1810        return a < b ? -1 : (a > b);  /* newest first */
1811}
1812
1813struct unpacked {
1814        struct object_entry *entry;
1815        void *data;
1816        struct delta_index *index;
1817        unsigned depth;
1818};
1819
1820static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
1821                           unsigned long delta_size)
1822{
1823        if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
1824                return 0;
1825
1826        if (delta_size < cache_max_small_delta_size)
1827                return 1;
1828
1829        /* cache delta, if objects are large enough compared to delta size */
1830        if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
1831                return 1;
1832
1833        return 0;
1834}
1835
1836#ifndef NO_PTHREADS
1837
1838static pthread_mutex_t read_mutex;
1839#define read_lock()             pthread_mutex_lock(&read_mutex)
1840#define read_unlock()           pthread_mutex_unlock(&read_mutex)
1841
1842static pthread_mutex_t cache_mutex;
1843#define cache_lock()            pthread_mutex_lock(&cache_mutex)
1844#define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
1845
1846static pthread_mutex_t progress_mutex;
1847#define progress_lock()         pthread_mutex_lock(&progress_mutex)
1848#define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
1849
1850#else
1851
1852#define read_lock()             (void)0
1853#define read_unlock()           (void)0
1854#define cache_lock()            (void)0
1855#define cache_unlock()          (void)0
1856#define progress_lock()         (void)0
1857#define progress_unlock()       (void)0
1858
1859#endif
1860
1861/*
1862 * Return the size of the object without doing any delta
1863 * reconstruction (so non-deltas are true object sizes, but deltas
1864 * return the size of the delta data).
1865 */
1866unsigned long oe_get_size_slow(struct packing_data *pack,
1867                               const struct object_entry *e)
1868{
1869        struct packed_git *p;
1870        struct pack_window *w_curs;
1871        unsigned char *buf;
1872        enum object_type type;
1873        unsigned long used, avail, size;
1874
1875        if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
1876                read_lock();
1877                if (oid_object_info(&e->idx.oid, &size) < 0)
1878                        die(_("unable to get size of %s"),
1879                            oid_to_hex(&e->idx.oid));
1880                read_unlock();
1881                return size;
1882        }
1883
1884        p = oe_in_pack(pack, e);
1885        if (!p)
1886                BUG("when e->type is a delta, it must belong to a pack");
1887
1888        read_lock();
1889        w_curs = NULL;
1890        buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
1891        used = unpack_object_header_buffer(buf, avail, &type, &size);
1892        if (used == 0)
1893                die(_("unable to parse object header of %s"),
1894                    oid_to_hex(&e->idx.oid));
1895
1896        unuse_pack(&w_curs);
1897        read_unlock();
1898        return size;
1899}
1900
1901static int try_delta(struct unpacked *trg, struct unpacked *src,
1902                     unsigned max_depth, unsigned long *mem_usage)
1903{
1904        struct object_entry *trg_entry = trg->entry;
1905        struct object_entry *src_entry = src->entry;
1906        unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1907        unsigned ref_depth;
1908        enum object_type type;
1909        void *delta_buf;
1910
1911        /* Don't bother doing diffs between different types */
1912        if (oe_type(trg_entry) != oe_type(src_entry))
1913                return -1;
1914
1915        /*
1916         * We do not bother to try a delta that we discarded on an
1917         * earlier try, but only when reusing delta data.  Note that
1918         * src_entry that is marked as the preferred_base should always
1919         * be considered, as even if we produce a suboptimal delta against
1920         * it, we will still save the transfer cost, as we already know
1921         * the other side has it and we won't send src_entry at all.
1922         */
1923        if (reuse_delta && IN_PACK(trg_entry) &&
1924            IN_PACK(trg_entry) == IN_PACK(src_entry) &&
1925            !src_entry->preferred_base &&
1926            trg_entry->in_pack_type != OBJ_REF_DELTA &&
1927            trg_entry->in_pack_type != OBJ_OFS_DELTA)
1928                return 0;
1929
1930        /* Let's not bust the allowed depth. */
1931        if (src->depth >= max_depth)
1932                return 0;
1933
1934        /* Now some size filtering heuristics. */
1935        trg_size = SIZE(trg_entry);
1936        if (!DELTA(trg_entry)) {
1937                max_size = trg_size/2 - 20;
1938                ref_depth = 1;
1939        } else {
1940                max_size = trg_entry->delta_size;
1941                ref_depth = trg->depth;
1942        }
1943        max_size = (uint64_t)max_size * (max_depth - src->depth) /
1944                                                (max_depth - ref_depth + 1);
1945        if (max_size == 0)
1946                return 0;
1947        src_size = SIZE(src_entry);
1948        sizediff = src_size < trg_size ? trg_size - src_size : 0;
1949        if (sizediff >= max_size)
1950                return 0;
1951        if (trg_size < src_size / 32)
1952                return 0;
1953
1954        /* Load data if not already done */
1955        if (!trg->data) {
1956                read_lock();
1957                trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
1958                read_unlock();
1959                if (!trg->data)
1960                        die("object %s cannot be read",
1961                            oid_to_hex(&trg_entry->idx.oid));
1962                if (sz != trg_size)
1963                        die("object %s inconsistent object length (%lu vs %lu)",
1964                            oid_to_hex(&trg_entry->idx.oid), sz,
1965                            trg_size);
1966                *mem_usage += sz;
1967        }
1968        if (!src->data) {
1969                read_lock();
1970                src->data = read_object_file(&src_entry->idx.oid, &type, &sz);
1971                read_unlock();
1972                if (!src->data) {
1973                        if (src_entry->preferred_base) {
1974                                static int warned = 0;
1975                                if (!warned++)
1976                                        warning("object %s cannot be read",
1977                                                oid_to_hex(&src_entry->idx.oid));
1978                                /*
1979                                 * Those objects are not included in the
1980                                 * resulting pack.  Be resilient and ignore
1981                                 * them if they can't be read, in case the
1982                                 * pack could be created nevertheless.
1983                                 */
1984                                return 0;
1985                        }
1986                        die("object %s cannot be read",
1987                            oid_to_hex(&src_entry->idx.oid));
1988                }
1989                if (sz != src_size)
1990                        die("object %s inconsistent object length (%lu vs %lu)",
1991                            oid_to_hex(&src_entry->idx.oid), sz,
1992                            src_size);
1993                *mem_usage += sz;
1994        }
1995        if (!src->index) {
1996                src->index = create_delta_index(src->data, src_size);
1997                if (!src->index) {
1998                        static int warned = 0;
1999                        if (!warned++)
2000                                warning("suboptimal pack - out of memory");
2001                        return 0;
2002                }
2003                *mem_usage += sizeof_delta_index(src->index);
2004        }
2005
2006        delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2007        if (!delta_buf)
2008                return 0;
2009
2010        if (DELTA(trg_entry)) {
2011                /* Prefer only shallower same-sized deltas. */
2012                if (delta_size == trg_entry->delta_size &&
2013                    src->depth + 1 >= trg->depth) {
2014                        free(delta_buf);
2015                        return 0;
2016                }
2017        }
2018
2019        /*
2020         * Handle memory allocation outside of the cache
2021         * accounting lock.  Compiler will optimize the strangeness
2022         * away when NO_PTHREADS is defined.
2023         */
2024        free(trg_entry->delta_data);
2025        cache_lock();
2026        if (trg_entry->delta_data) {
2027                delta_cache_size -= trg_entry->delta_size;
2028                trg_entry->delta_data = NULL;
2029        }
2030        if (delta_cacheable(src_size, trg_size, delta_size)) {
2031                delta_cache_size += delta_size;
2032                cache_unlock();
2033                trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2034        } else {
2035                cache_unlock();
2036                free(delta_buf);
2037        }
2038
2039        SET_DELTA(trg_entry, src_entry);
2040        trg_entry->delta_size = delta_size;
2041        trg->depth = src->depth + 1;
2042
2043        return 1;
2044}
2045
2046static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2047{
2048        struct object_entry *child = DELTA_CHILD(me);
2049        unsigned int m = n;
2050        while (child) {
2051                unsigned int c = check_delta_limit(child, n + 1);
2052                if (m < c)
2053                        m = c;
2054                child = DELTA_SIBLING(child);
2055        }
2056        return m;
2057}
2058
2059static unsigned long free_unpacked(struct unpacked *n)
2060{
2061        unsigned long freed_mem = sizeof_delta_index(n->index);
2062        free_delta_index(n->index);
2063        n->index = NULL;
2064        if (n->data) {
2065                freed_mem += SIZE(n->entry);
2066                FREE_AND_NULL(n->data);
2067        }
2068        n->entry = NULL;
2069        n->depth = 0;
2070        return freed_mem;
2071}
2072
2073static void find_deltas(struct object_entry **list, unsigned *list_size,
2074                        int window, int depth, unsigned *processed)
2075{
2076        uint32_t i, idx = 0, count = 0;
2077        struct unpacked *array;
2078        unsigned long mem_usage = 0;
2079
2080        array = xcalloc(window, sizeof(struct unpacked));
2081
2082        for (;;) {
2083                struct object_entry *entry;
2084                struct unpacked *n = array + idx;
2085                int j, max_depth, best_base = -1;
2086
2087                progress_lock();
2088                if (!*list_size) {
2089                        progress_unlock();
2090                        break;
2091                }
2092                entry = *list++;
2093                (*list_size)--;
2094                if (!entry->preferred_base) {
2095                        (*processed)++;
2096                        display_progress(progress_state, *processed);
2097                }
2098                progress_unlock();
2099
2100                mem_usage -= free_unpacked(n);
2101                n->entry = entry;
2102
2103                while (window_memory_limit &&
2104                       mem_usage > window_memory_limit &&
2105                       count > 1) {
2106                        uint32_t tail = (idx + window - count) % window;
2107                        mem_usage -= free_unpacked(array + tail);
2108                        count--;
2109                }
2110
2111                /* We do not compute delta to *create* objects we are not
2112                 * going to pack.
2113                 */
2114                if (entry->preferred_base)
2115                        goto next;
2116
2117                /*
2118                 * If the current object is at pack edge, take the depth the
2119                 * objects that depend on the current object into account
2120                 * otherwise they would become too deep.
2121                 */
2122                max_depth = depth;
2123                if (DELTA_CHILD(entry)) {
2124                        max_depth -= check_delta_limit(entry, 0);
2125                        if (max_depth <= 0)
2126                                goto next;
2127                }
2128
2129                j = window;
2130                while (--j > 0) {
2131                        int ret;
2132                        uint32_t other_idx = idx + j;
2133                        struct unpacked *m;
2134                        if (other_idx >= window)
2135                                other_idx -= window;
2136                        m = array + other_idx;
2137                        if (!m->entry)
2138                                break;
2139                        ret = try_delta(n, m, max_depth, &mem_usage);
2140                        if (ret < 0)
2141                                break;
2142                        else if (ret > 0)
2143                                best_base = other_idx;
2144                }
2145
2146                /*
2147                 * If we decided to cache the delta data, then it is best
2148                 * to compress it right away.  First because we have to do
2149                 * it anyway, and doing it here while we're threaded will
2150                 * save a lot of time in the non threaded write phase,
2151                 * as well as allow for caching more deltas within
2152                 * the same cache size limit.
2153                 * ...
2154                 * But only if not writing to stdout, since in that case
2155                 * the network is most likely throttling writes anyway,
2156                 * and therefore it is best to go to the write phase ASAP
2157                 * instead, as we can afford spending more time compressing
2158                 * between writes at that moment.
2159                 */
2160                if (entry->delta_data && !pack_to_stdout) {
2161                        unsigned long size;
2162
2163                        size = do_compress(&entry->delta_data, entry->delta_size);
2164                        if (size < (1U << OE_Z_DELTA_BITS)) {
2165                                entry->z_delta_size = size;
2166                                cache_lock();
2167                                delta_cache_size -= entry->delta_size;
2168                                delta_cache_size += entry->z_delta_size;
2169                                cache_unlock();
2170                        } else {
2171                                FREE_AND_NULL(entry->delta_data);
2172                                entry->z_delta_size = 0;
2173                        }
2174                }
2175
2176                /* if we made n a delta, and if n is already at max
2177                 * depth, leaving it in the window is pointless.  we
2178                 * should evict it first.
2179                 */
2180                if (DELTA(entry) && max_depth <= n->depth)
2181                        continue;
2182
2183                /*
2184                 * Move the best delta base up in the window, after the
2185                 * currently deltified object, to keep it longer.  It will
2186                 * be the first base object to be attempted next.
2187                 */
2188                if (DELTA(entry)) {
2189                        struct unpacked swap = array[best_base];
2190                        int dist = (window + idx - best_base) % window;
2191                        int dst = best_base;
2192                        while (dist--) {
2193                                int src = (dst + 1) % window;
2194                                array[dst] = array[src];
2195                                dst = src;
2196                        }
2197                        array[dst] = swap;
2198                }
2199
2200                next:
2201                idx++;
2202                if (count + 1 < window)
2203                        count++;
2204                if (idx >= window)
2205                        idx = 0;
2206        }
2207
2208        for (i = 0; i < window; ++i) {
2209                free_delta_index(array[i].index);
2210                free(array[i].data);
2211        }
2212        free(array);
2213}
2214
2215#ifndef NO_PTHREADS
2216
2217static void try_to_free_from_threads(size_t size)
2218{
2219        read_lock();
2220        release_pack_memory(size);
2221        read_unlock();
2222}
2223
2224static try_to_free_t old_try_to_free_routine;
2225
2226/*
2227 * The main thread waits on the condition that (at least) one of the workers
2228 * has stopped working (which is indicated in the .working member of
2229 * struct thread_params).
2230 * When a work thread has completed its work, it sets .working to 0 and
2231 * signals the main thread and waits on the condition that .data_ready
2232 * becomes 1.
2233 */
2234
2235struct thread_params {
2236        pthread_t thread;
2237        struct object_entry **list;
2238        unsigned list_size;
2239        unsigned remaining;
2240        int window;
2241        int depth;
2242        int working;
2243        int data_ready;
2244        pthread_mutex_t mutex;
2245        pthread_cond_t cond;
2246        unsigned *processed;
2247};
2248
2249static pthread_cond_t progress_cond;
2250
2251/*
2252 * Mutex and conditional variable can't be statically-initialized on Windows.
2253 */
2254static void init_threaded_search(void)
2255{
2256        init_recursive_mutex(&read_mutex);
2257        pthread_mutex_init(&cache_mutex, NULL);
2258        pthread_mutex_init(&progress_mutex, NULL);
2259        pthread_cond_init(&progress_cond, NULL);
2260        old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
2261}
2262
2263static void cleanup_threaded_search(void)
2264{
2265        set_try_to_free_routine(old_try_to_free_routine);
2266        pthread_cond_destroy(&progress_cond);
2267        pthread_mutex_destroy(&read_mutex);
2268        pthread_mutex_destroy(&cache_mutex);
2269        pthread_mutex_destroy(&progress_mutex);
2270}
2271
2272static void *threaded_find_deltas(void *arg)
2273{
2274        struct thread_params *me = arg;
2275
2276        progress_lock();
2277        while (me->remaining) {
2278                progress_unlock();
2279
2280                find_deltas(me->list, &me->remaining,
2281                            me->window, me->depth, me->processed);
2282
2283                progress_lock();
2284                me->working = 0;
2285                pthread_cond_signal(&progress_cond);
2286                progress_unlock();
2287
2288                /*
2289                 * We must not set ->data_ready before we wait on the
2290                 * condition because the main thread may have set it to 1
2291                 * before we get here. In order to be sure that new
2292                 * work is available if we see 1 in ->data_ready, it
2293                 * was initialized to 0 before this thread was spawned
2294                 * and we reset it to 0 right away.
2295                 */
2296                pthread_mutex_lock(&me->mutex);
2297                while (!me->data_ready)
2298                        pthread_cond_wait(&me->cond, &me->mutex);
2299                me->data_ready = 0;
2300                pthread_mutex_unlock(&me->mutex);
2301
2302                progress_lock();
2303        }
2304        progress_unlock();
2305        /* leave ->working 1 so that this doesn't get more work assigned */
2306        return NULL;
2307}
2308
2309static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2310                           int window, int depth, unsigned *processed)
2311{
2312        struct thread_params *p;
2313        int i, ret, active_threads = 0;
2314
2315        init_threaded_search();
2316
2317        if (delta_search_threads <= 1) {
2318                find_deltas(list, &list_size, window, depth, processed);
2319                cleanup_threaded_search();
2320                return;
2321        }
2322        if (progress > pack_to_stdout)
2323                fprintf(stderr, "Delta compression using up to %d threads.\n",
2324                                delta_search_threads);
2325        p = xcalloc(delta_search_threads, sizeof(*p));
2326
2327        /* Partition the work amongst work threads. */
2328        for (i = 0; i < delta_search_threads; i++) {
2329                unsigned sub_size = list_size / (delta_search_threads - i);
2330
2331                /* don't use too small segments or no deltas will be found */
2332                if (sub_size < 2*window && i+1 < delta_search_threads)
2333                        sub_size = 0;
2334
2335                p[i].window = window;
2336                p[i].depth = depth;
2337                p[i].processed = processed;
2338                p[i].working = 1;
2339                p[i].data_ready = 0;
2340
2341                /* try to split chunks on "path" boundaries */
2342                while (sub_size && sub_size < list_size &&
2343                       list[sub_size]->hash &&
2344                       list[sub_size]->hash == list[sub_size-1]->hash)
2345                        sub_size++;
2346
2347                p[i].list = list;
2348                p[i].list_size = sub_size;
2349                p[i].remaining = sub_size;
2350
2351                list += sub_size;
2352                list_size -= sub_size;
2353        }
2354
2355        /* Start work threads. */
2356        for (i = 0; i < delta_search_threads; i++) {
2357                if (!p[i].list_size)
2358                        continue;
2359                pthread_mutex_init(&p[i].mutex, NULL);
2360                pthread_cond_init(&p[i].cond, NULL);
2361                ret = pthread_create(&p[i].thread, NULL,
2362                                     threaded_find_deltas, &p[i]);
2363                if (ret)
2364                        die("unable to create thread: %s", strerror(ret));
2365                active_threads++;
2366        }
2367
2368        /*
2369         * Now let's wait for work completion.  Each time a thread is done
2370         * with its work, we steal half of the remaining work from the
2371         * thread with the largest number of unprocessed objects and give
2372         * it to that newly idle thread.  This ensure good load balancing
2373         * until the remaining object list segments are simply too short
2374         * to be worth splitting anymore.
2375         */
2376        while (active_threads) {
2377                struct thread_params *target = NULL;
2378                struct thread_params *victim = NULL;
2379                unsigned sub_size = 0;
2380
2381                progress_lock();
2382                for (;;) {
2383                        for (i = 0; !target && i < delta_search_threads; i++)
2384                                if (!p[i].working)
2385                                        target = &p[i];
2386                        if (target)
2387                                break;
2388                        pthread_cond_wait(&progress_cond, &progress_mutex);
2389                }
2390
2391                for (i = 0; i < delta_search_threads; i++)
2392                        if (p[i].remaining > 2*window &&
2393                            (!victim || victim->remaining < p[i].remaining))
2394                                victim = &p[i];
2395                if (victim) {
2396                        sub_size = victim->remaining / 2;
2397                        list = victim->list + victim->list_size - sub_size;
2398                        while (sub_size && list[0]->hash &&
2399                               list[0]->hash == list[-1]->hash) {
2400                                list++;
2401                                sub_size--;
2402                        }
2403                        if (!sub_size) {
2404                                /*
2405                                 * It is possible for some "paths" to have
2406                                 * so many objects that no hash boundary
2407                                 * might be found.  Let's just steal the
2408                                 * exact half in that case.
2409                                 */
2410                                sub_size = victim->remaining / 2;
2411                                list -= sub_size;
2412                        }
2413                        target->list = list;
2414                        victim->list_size -= sub_size;
2415                        victim->remaining -= sub_size;
2416                }
2417                target->list_size = sub_size;
2418                target->remaining = sub_size;
2419                target->working = 1;
2420                progress_unlock();
2421
2422                pthread_mutex_lock(&target->mutex);
2423                target->data_ready = 1;
2424                pthread_cond_signal(&target->cond);
2425                pthread_mutex_unlock(&target->mutex);
2426
2427                if (!sub_size) {
2428                        pthread_join(target->thread, NULL);
2429                        pthread_cond_destroy(&target->cond);
2430                        pthread_mutex_destroy(&target->mutex);
2431                        active_threads--;
2432                }
2433        }
2434        cleanup_threaded_search();
2435        free(p);
2436}
2437
2438#else
2439#define ll_find_deltas(l, s, w, d, p)   find_deltas(l, &s, w, d, p)
2440#endif
2441
2442static void add_tag_chain(const struct object_id *oid)
2443{
2444        struct tag *tag;
2445
2446        /*
2447         * We catch duplicates already in add_object_entry(), but we'd
2448         * prefer to do this extra check to avoid having to parse the
2449         * tag at all if we already know that it's being packed (e.g., if
2450         * it was included via bitmaps, we would not have parsed it
2451         * previously).
2452         */
2453        if (packlist_find(&to_pack, oid->hash, NULL))
2454                return;
2455
2456        tag = lookup_tag(oid);
2457        while (1) {
2458                if (!tag || parse_tag(tag) || !tag->tagged)
2459                        die("unable to pack objects reachable from tag %s",
2460                            oid_to_hex(oid));
2461
2462                add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
2463
2464                if (tag->tagged->type != OBJ_TAG)
2465                        return;
2466
2467                tag = (struct tag *)tag->tagged;
2468        }
2469}
2470
2471static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data)
2472{
2473        struct object_id peeled;
2474
2475        if (starts_with(path, "refs/tags/") && /* is a tag? */
2476            !peel_ref(path, &peeled)    && /* peelable? */
2477            packlist_find(&to_pack, peeled.hash, NULL))      /* object packed? */
2478                add_tag_chain(oid);
2479        return 0;
2480}
2481
2482static void prepare_pack(int window, int depth)
2483{
2484        struct object_entry **delta_list;
2485        uint32_t i, nr_deltas;
2486        unsigned n;
2487
2488        get_object_details();
2489
2490        /*
2491         * If we're locally repacking then we need to be doubly careful
2492         * from now on in order to make sure no stealth corruption gets
2493         * propagated to the new pack.  Clients receiving streamed packs
2494         * should validate everything they get anyway so no need to incur
2495         * the additional cost here in that case.
2496         */
2497        if (!pack_to_stdout)
2498                do_check_packed_object_crc = 1;
2499
2500        if (!to_pack.nr_objects || !window || !depth)
2501                return;
2502
2503        ALLOC_ARRAY(delta_list, to_pack.nr_objects);
2504        nr_deltas = n = 0;
2505
2506        for (i = 0; i < to_pack.nr_objects; i++) {
2507                struct object_entry *entry = to_pack.objects + i;
2508
2509                if (DELTA(entry))
2510                        /* This happens if we decided to reuse existing
2511                         * delta from a pack.  "reuse_delta &&" is implied.
2512                         */
2513                        continue;
2514
2515                if (!entry->type_valid ||
2516                    oe_size_less_than(&to_pack, entry, 50))
2517                        continue;
2518
2519                if (entry->no_try_delta)
2520                        continue;
2521
2522                if (!entry->preferred_base) {
2523                        nr_deltas++;
2524                        if (oe_type(entry) < 0)
2525                                die("unable to get type of object %s",
2526                                    oid_to_hex(&entry->idx.oid));
2527                } else {
2528                        if (oe_type(entry) < 0) {
2529                                /*
2530                                 * This object is not found, but we
2531                                 * don't have to include it anyway.
2532                                 */
2533                                continue;
2534                        }
2535                }
2536
2537                delta_list[n++] = entry;
2538        }
2539
2540        if (nr_deltas && n > 1) {
2541                unsigned nr_done = 0;
2542                if (progress)
2543                        progress_state = start_progress(_("Compressing objects"),
2544                                                        nr_deltas);
2545                QSORT(delta_list, n, type_size_sort);
2546                ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
2547                stop_progress(&progress_state);
2548                if (nr_done != nr_deltas)
2549                        die("inconsistency with delta count");
2550        }
2551        free(delta_list);
2552}
2553
2554static int git_pack_config(const char *k, const char *v, void *cb)
2555{
2556        if (!strcmp(k, "pack.window")) {
2557                window = git_config_int(k, v);
2558                return 0;
2559        }
2560        if (!strcmp(k, "pack.windowmemory")) {
2561                window_memory_limit = git_config_ulong(k, v);
2562                return 0;
2563        }
2564        if (!strcmp(k, "pack.depth")) {
2565                depth = git_config_int(k, v);
2566                return 0;
2567        }
2568        if (!strcmp(k, "pack.deltacachesize")) {
2569                max_delta_cache_size = git_config_int(k, v);
2570                return 0;
2571        }
2572        if (!strcmp(k, "pack.deltacachelimit")) {
2573                cache_max_small_delta_size = git_config_int(k, v);
2574                return 0;
2575        }
2576        if (!strcmp(k, "pack.writebitmaphashcache")) {
2577                if (git_config_bool(k, v))
2578                        write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
2579                else
2580                        write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
2581        }
2582        if (!strcmp(k, "pack.usebitmaps")) {
2583                use_bitmap_index_default = git_config_bool(k, v);
2584                return 0;
2585        }
2586        if (!strcmp(k, "pack.threads")) {
2587                delta_search_threads = git_config_int(k, v);
2588                if (delta_search_threads < 0)
2589                        die("invalid number of threads specified (%d)",
2590                            delta_search_threads);
2591#ifdef NO_PTHREADS
2592                if (delta_search_threads != 1) {
2593                        warning("no threads support, ignoring %s", k);
2594                        delta_search_threads = 0;
2595                }
2596#endif
2597                return 0;
2598        }
2599        if (!strcmp(k, "pack.indexversion")) {
2600                pack_idx_opts.version = git_config_int(k, v);
2601                if (pack_idx_opts.version > 2)
2602                        die("bad pack.indexversion=%"PRIu32,
2603                            pack_idx_opts.version);
2604                return 0;
2605        }
2606        return git_default_config(k, v, cb);
2607}
2608
2609static void read_object_list_from_stdin(void)
2610{
2611        char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
2612        struct object_id oid;
2613        const char *p;
2614
2615        for (;;) {
2616                if (!fgets(line, sizeof(line), stdin)) {
2617                        if (feof(stdin))
2618                                break;
2619                        if (!ferror(stdin))
2620                                die("fgets returned NULL, not EOF, not error!");
2621                        if (errno != EINTR)
2622                                die_errno("fgets");
2623                        clearerr(stdin);
2624                        continue;
2625                }
2626                if (line[0] == '-') {
2627                        if (get_oid_hex(line+1, &oid))
2628                                die("expected edge object ID, got garbage:\n %s",
2629                                    line);
2630                        add_preferred_base(&oid);
2631                        continue;
2632                }
2633                if (parse_oid_hex(line, &oid, &p))
2634                        die("expected object ID, got garbage:\n %s", line);
2635
2636                add_preferred_base_object(p + 1);
2637                add_object_entry(&oid, OBJ_NONE, p + 1, 0);
2638        }
2639}
2640
2641/* Remember to update object flag allocation in object.h */
2642#define OBJECT_ADDED (1u<<20)
2643
2644static void show_commit(struct commit *commit, void *data)
2645{
2646        add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
2647        commit->object.flags |= OBJECT_ADDED;
2648
2649        if (write_bitmap_index)
2650                index_commit_for_bitmap(commit);
2651}
2652
2653static void show_object(struct object *obj, const char *name, void *data)
2654{
2655        add_preferred_base_object(name);
2656        add_object_entry(&obj->oid, obj->type, name, 0);
2657        obj->flags |= OBJECT_ADDED;
2658}
2659
2660static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
2661{
2662        assert(arg_missing_action == MA_ALLOW_ANY);
2663
2664        /*
2665         * Quietly ignore ALL missing objects.  This avoids problems with
2666         * staging them now and getting an odd error later.
2667         */
2668        if (!has_object_file(&obj->oid))
2669                return;
2670
2671        show_object(obj, name, data);
2672}
2673
2674static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
2675{
2676        assert(arg_missing_action == MA_ALLOW_PROMISOR);
2677
2678        /*
2679         * Quietly ignore EXPECTED missing objects.  This avoids problems with
2680         * staging them now and getting an odd error later.
2681         */
2682        if (!has_object_file(&obj->oid) && is_promisor_object(&obj->oid))
2683                return;
2684
2685        show_object(obj, name, data);
2686}
2687
2688static int option_parse_missing_action(const struct option *opt,
2689                                       const char *arg, int unset)
2690{
2691        assert(arg);
2692        assert(!unset);
2693
2694        if (!strcmp(arg, "error")) {
2695                arg_missing_action = MA_ERROR;
2696                fn_show_object = show_object;
2697                return 0;
2698        }
2699
2700        if (!strcmp(arg, "allow-any")) {
2701                arg_missing_action = MA_ALLOW_ANY;
2702                fetch_if_missing = 0;
2703                fn_show_object = show_object__ma_allow_any;
2704                return 0;
2705        }
2706
2707        if (!strcmp(arg, "allow-promisor")) {
2708                arg_missing_action = MA_ALLOW_PROMISOR;
2709                fetch_if_missing = 0;
2710                fn_show_object = show_object__ma_allow_promisor;
2711                return 0;
2712        }
2713
2714        die(_("invalid value for --missing"));
2715        return 0;
2716}
2717
2718static void show_edge(struct commit *commit)
2719{
2720        add_preferred_base(&commit->object.oid);
2721}
2722
2723struct in_pack_object {
2724        off_t offset;
2725        struct object *object;
2726};
2727
2728struct in_pack {
2729        unsigned int alloc;
2730        unsigned int nr;
2731        struct in_pack_object *array;
2732};
2733
2734static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
2735{
2736        in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);
2737        in_pack->array[in_pack->nr].object = object;
2738        in_pack->nr++;
2739}
2740
2741/*
2742 * Compare the objects in the offset order, in order to emulate the
2743 * "git rev-list --objects" output that produced the pack originally.
2744 */
2745static int ofscmp(const void *a_, const void *b_)
2746{
2747        struct in_pack_object *a = (struct in_pack_object *)a_;
2748        struct in_pack_object *b = (struct in_pack_object *)b_;
2749
2750        if (a->offset < b->offset)
2751                return -1;
2752        else if (a->offset > b->offset)
2753                return 1;
2754        else
2755                return oidcmp(&a->object->oid, &b->object->oid);
2756}
2757
2758static void add_objects_in_unpacked_packs(struct rev_info *revs)
2759{
2760        struct packed_git *p;
2761        struct in_pack in_pack;
2762        uint32_t i;
2763
2764        memset(&in_pack, 0, sizeof(in_pack));
2765
2766        for (p = get_packed_git(the_repository); p; p = p->next) {
2767                struct object_id oid;
2768                struct object *o;
2769
2770                if (!p->pack_local || p->pack_keep)
2771                        continue;
2772                if (open_pack_index(p))
2773                        die("cannot open pack index");
2774
2775                ALLOC_GROW(in_pack.array,
2776                           in_pack.nr + p->num_objects,
2777                           in_pack.alloc);
2778
2779                for (i = 0; i < p->num_objects; i++) {
2780                        nth_packed_object_oid(&oid, p, i);
2781                        o = lookup_unknown_object(oid.hash);
2782                        if (!(o->flags & OBJECT_ADDED))
2783                                mark_in_pack_object(o, p, &in_pack);
2784                        o->flags |= OBJECT_ADDED;
2785                }
2786        }
2787
2788        if (in_pack.nr) {
2789                QSORT(in_pack.array, in_pack.nr, ofscmp);
2790                for (i = 0; i < in_pack.nr; i++) {
2791                        struct object *o = in_pack.array[i].object;
2792                        add_object_entry(&o->oid, o->type, "", 0);
2793                }
2794        }
2795        free(in_pack.array);
2796}
2797
2798static int add_loose_object(const struct object_id *oid, const char *path,
2799                            void *data)
2800{
2801        enum object_type type = oid_object_info(oid, NULL);
2802
2803        if (type < 0) {
2804                warning("loose object at %s could not be examined", path);
2805                return 0;
2806        }
2807
2808        add_object_entry(oid, type, "", 0);
2809        return 0;
2810}
2811
2812/*
2813 * We actually don't even have to worry about reachability here.
2814 * add_object_entry will weed out duplicates, so we just add every
2815 * loose object we find.
2816 */
2817static void add_unreachable_loose_objects(void)
2818{
2819        for_each_loose_file_in_objdir(get_object_directory(),
2820                                      add_loose_object,
2821                                      NULL, NULL, NULL);
2822}
2823
2824static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
2825{
2826        static struct packed_git *last_found = (void *)1;
2827        struct packed_git *p;
2828
2829        p = (last_found != (void *)1) ? last_found :
2830                                        get_packed_git(the_repository);
2831
2832        while (p) {
2833                if ((!p->pack_local || p->pack_keep) &&
2834                        find_pack_entry_one(oid->hash, p)) {
2835                        last_found = p;
2836                        return 1;
2837                }
2838                if (p == last_found)
2839                        p = get_packed_git(the_repository);
2840                else
2841                        p = p->next;
2842                if (p == last_found)
2843                        p = p->next;
2844        }
2845        return 0;
2846}
2847
2848/*
2849 * Store a list of sha1s that are should not be discarded
2850 * because they are either written too recently, or are
2851 * reachable from another object that was.
2852 *
2853 * This is filled by get_object_list.
2854 */
2855static struct oid_array recent_objects;
2856
2857static int loosened_object_can_be_discarded(const struct object_id *oid,
2858                                            timestamp_t mtime)
2859{
2860        if (!unpack_unreachable_expiration)
2861                return 0;
2862        if (mtime > unpack_unreachable_expiration)
2863                return 0;
2864        if (oid_array_lookup(&recent_objects, oid) >= 0)
2865                return 0;
2866        return 1;
2867}
2868
2869static void loosen_unused_packed_objects(struct rev_info *revs)
2870{
2871        struct packed_git *p;
2872        uint32_t i;
2873        struct object_id oid;
2874
2875        for (p = get_packed_git(the_repository); p; p = p->next) {
2876                if (!p->pack_local || p->pack_keep)
2877                        continue;
2878
2879                if (open_pack_index(p))
2880                        die("cannot open pack index");
2881
2882                for (i = 0; i < p->num_objects; i++) {
2883                        nth_packed_object_oid(&oid, p, i);
2884                        if (!packlist_find(&to_pack, oid.hash, NULL) &&
2885                            !has_sha1_pack_kept_or_nonlocal(&oid) &&
2886                            !loosened_object_can_be_discarded(&oid, p->mtime))
2887                                if (force_object_loose(&oid, p->mtime))
2888                                        die("unable to force loose object");
2889                }
2890        }
2891}
2892
2893/*
2894 * This tracks any options which pack-reuse code expects to be on, or which a
2895 * reader of the pack might not understand, and which would therefore prevent
2896 * blind reuse of what we have on disk.
2897 */
2898static int pack_options_allow_reuse(void)
2899{
2900        return pack_to_stdout &&
2901               allow_ofs_delta &&
2902               !ignore_packed_keep &&
2903               (!local || !have_non_local_packs) &&
2904               !incremental;
2905}
2906
2907static int get_object_list_from_bitmap(struct rev_info *revs)
2908{
2909        if (prepare_bitmap_walk(revs) < 0)
2910                return -1;
2911
2912        if (pack_options_allow_reuse() &&
2913            !reuse_partial_packfile_from_bitmap(
2914                        &reuse_packfile,
2915                        &reuse_packfile_objects,
2916                        &reuse_packfile_offset)) {
2917                assert(reuse_packfile_objects);
2918                nr_result += reuse_packfile_objects;
2919                display_progress(progress_state, nr_result);
2920        }
2921
2922        traverse_bitmap_commit_list(&add_object_entry_from_bitmap);
2923        return 0;
2924}
2925
2926static void record_recent_object(struct object *obj,
2927                                 const char *name,
2928                                 void *data)
2929{
2930        oid_array_append(&recent_objects, &obj->oid);
2931}
2932
2933static void record_recent_commit(struct commit *commit, void *data)
2934{
2935        oid_array_append(&recent_objects, &commit->object.oid);
2936}
2937
2938static void get_object_list(int ac, const char **av)
2939{
2940        struct rev_info revs;
2941        char line[1000];
2942        int flags = 0;
2943
2944        init_revisions(&revs, NULL);
2945        save_commit_buffer = 0;
2946        setup_revisions(ac, av, &revs, NULL);
2947
2948        /* make sure shallows are read */
2949        is_repository_shallow();
2950
2951        while (fgets(line, sizeof(line), stdin) != NULL) {
2952                int len = strlen(line);
2953                if (len && line[len - 1] == '\n')
2954                        line[--len] = 0;
2955                if (!len)
2956                        break;
2957                if (*line == '-') {
2958                        if (!strcmp(line, "--not")) {
2959                                flags ^= UNINTERESTING;
2960                                write_bitmap_index = 0;
2961                                continue;
2962                        }
2963                        if (starts_with(line, "--shallow ")) {
2964                                struct object_id oid;
2965                                if (get_oid_hex(line + 10, &oid))
2966                                        die("not an SHA-1 '%s'", line + 10);
2967                                register_shallow(&oid);
2968                                use_bitmap_index = 0;
2969                                continue;
2970                        }
2971                        die("not a rev '%s'", line);
2972                }
2973                if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
2974                        die("bad revision '%s'", line);
2975        }
2976
2977        if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
2978                return;
2979
2980        if (prepare_revision_walk(&revs))
2981                die("revision walk setup failed");
2982        mark_edges_uninteresting(&revs, show_edge);
2983
2984        if (!fn_show_object)
2985                fn_show_object = show_object;
2986        traverse_commit_list_filtered(&filter_options, &revs,
2987                                      show_commit, fn_show_object, NULL,
2988                                      NULL);
2989
2990        if (unpack_unreachable_expiration) {
2991                revs.ignore_missing_links = 1;
2992                if (add_unseen_recent_objects_to_traversal(&revs,
2993                                unpack_unreachable_expiration))
2994                        die("unable to add recent objects");
2995                if (prepare_revision_walk(&revs))
2996                        die("revision walk setup failed");
2997                traverse_commit_list(&revs, record_recent_commit,
2998                                     record_recent_object, NULL);
2999        }
3000
3001        if (keep_unreachable)
3002                add_objects_in_unpacked_packs(&revs);
3003        if (pack_loose_unreachable)
3004                add_unreachable_loose_objects();
3005        if (unpack_unreachable)
3006                loosen_unused_packed_objects(&revs);
3007
3008        oid_array_clear(&recent_objects);
3009}
3010
3011static int option_parse_index_version(const struct option *opt,
3012                                      const char *arg, int unset)
3013{
3014        char *c;
3015        const char *val = arg;
3016        pack_idx_opts.version = strtoul(val, &c, 10);
3017        if (pack_idx_opts.version > 2)
3018                die(_("unsupported index version %s"), val);
3019        if (*c == ',' && c[1])
3020                pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
3021        if (*c || pack_idx_opts.off32_limit & 0x80000000)
3022                die(_("bad index version '%s'"), val);
3023        return 0;
3024}
3025
3026static int option_parse_unpack_unreachable(const struct option *opt,
3027                                           const char *arg, int unset)
3028{
3029        if (unset) {
3030                unpack_unreachable = 0;
3031                unpack_unreachable_expiration = 0;
3032        }
3033        else {
3034                unpack_unreachable = 1;
3035                if (arg)
3036                        unpack_unreachable_expiration = approxidate(arg);
3037        }
3038        return 0;
3039}
3040
3041int cmd_pack_objects(int argc, const char **argv, const char *prefix)
3042{
3043        int use_internal_rev_list = 0;
3044        int thin = 0;
3045        int shallow = 0;
3046        int all_progress_implied = 0;
3047        struct argv_array rp = ARGV_ARRAY_INIT;
3048        int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
3049        int rev_list_index = 0;
3050        struct option pack_objects_options[] = {
3051                OPT_SET_INT('q', "quiet", &progress,
3052                            N_("do not show progress meter"), 0),
3053                OPT_SET_INT(0, "progress", &progress,
3054                            N_("show progress meter"), 1),
3055                OPT_SET_INT(0, "all-progress", &progress,
3056                            N_("show progress meter during object writing phase"), 2),
3057                OPT_BOOL(0, "all-progress-implied",
3058                         &all_progress_implied,
3059                         N_("similar to --all-progress when progress meter is shown")),
3060                { OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"),
3061                  N_("write the pack index file in the specified idx format version"),
3062                  0, option_parse_index_version },
3063                OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
3064                              N_("maximum size of each output pack file")),
3065                OPT_BOOL(0, "local", &local,
3066                         N_("ignore borrowed objects from alternate object store")),
3067                OPT_BOOL(0, "incremental", &incremental,
3068                         N_("ignore packed objects")),
3069                OPT_INTEGER(0, "window", &window,
3070                            N_("limit pack window by objects")),
3071                OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
3072                              N_("limit pack window by memory in addition to object limit")),
3073                OPT_INTEGER(0, "depth", &depth,
3074                            N_("maximum length of delta chain allowed in the resulting pack")),
3075                OPT_BOOL(0, "reuse-delta", &reuse_delta,
3076                         N_("reuse existing deltas")),
3077                OPT_BOOL(0, "reuse-object", &reuse_object,
3078                         N_("reuse existing objects")),
3079                OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
3080                         N_("use OFS_DELTA objects")),
3081                OPT_INTEGER(0, "threads", &delta_search_threads,
3082                            N_("use threads when searching for best delta matches")),
3083                OPT_BOOL(0, "non-empty", &non_empty,
3084                         N_("do not create an empty pack output")),
3085                OPT_BOOL(0, "revs", &use_internal_rev_list,
3086                         N_("read revision arguments from standard input")),
3087                { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,
3088                  N_("limit the objects to those that are not yet packed"),
3089                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
3090                { OPTION_SET_INT, 0, "all", &rev_list_all, NULL,
3091                  N_("include objects reachable from any reference"),
3092                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
3093                { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
3094                  N_("include objects referred by reflog entries"),
3095                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
3096                { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
3097                  N_("include objects referred to by the index"),
3098                  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
3099                OPT_BOOL(0, "stdout", &pack_to_stdout,
3100                         N_("output pack to stdout")),
3101                OPT_BOOL(0, "include-tag", &include_tag,
3102                         N_("include tag objects that refer to objects to be packed")),
3103                OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
3104                         N_("keep unreachable objects")),
3105                OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
3106                         N_("pack loose unreachable objects")),
3107                { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
3108                  N_("unpack unreachable objects newer than <time>"),
3109                  PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
3110                OPT_BOOL(0, "thin", &thin,
3111                         N_("create thin packs")),
3112                OPT_BOOL(0, "shallow", &shallow,
3113                         N_("create packs suitable for shallow fetches")),
3114                OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep,
3115                         N_("ignore packs that have companion .keep file")),
3116                OPT_INTEGER(0, "compression", &pack_compression_level,
3117                            N_("pack compression level")),
3118                OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
3119                            N_("do not hide commits by grafts"), 0),
3120                OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
3121                         N_("use a bitmap index if available to speed up counting objects")),
3122                OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,
3123                         N_("write a bitmap index together with the pack index")),
3124                OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
3125                { OPTION_CALLBACK, 0, "missing", NULL, N_("action"),
3126                  N_("handling for missing objects"), PARSE_OPT_NONEG,
3127                  option_parse_missing_action },
3128                OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
3129                         N_("do not pack objects in promisor packfiles")),
3130                OPT_END(),
3131        };
3132
3133        if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
3134                BUG("too many dfs states, increase OE_DFS_STATE_BITS");
3135
3136        check_replace_refs = 0;
3137
3138        reset_pack_idx_option(&pack_idx_opts);
3139        git_config(git_pack_config, NULL);
3140
3141        progress = isatty(2);
3142        argc = parse_options(argc, argv, prefix, pack_objects_options,
3143                             pack_usage, 0);
3144
3145        if (argc) {
3146                base_name = argv[0];
3147                argc--;
3148        }
3149        if (pack_to_stdout != !base_name || argc)
3150                usage_with_options(pack_usage, pack_objects_options);
3151
3152        if (depth >= (1 << OE_DEPTH_BITS)) {
3153                warning(_("delta chain depth %d is too deep, forcing %d"),
3154                        depth, (1 << OE_DEPTH_BITS) - 1);
3155                depth = (1 << OE_DEPTH_BITS) - 1;
3156        }
3157        if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
3158                warning(_("pack.deltaCacheLimit is too high, forcing %d"),
3159                        (1U << OE_Z_DELTA_BITS) - 1);
3160                cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
3161        }
3162
3163        argv_array_push(&rp, "pack-objects");
3164        if (thin) {
3165                use_internal_rev_list = 1;
3166                argv_array_push(&rp, shallow
3167                                ? "--objects-edge-aggressive"
3168                                : "--objects-edge");
3169        } else
3170                argv_array_push(&rp, "--objects");
3171
3172        if (rev_list_all) {
3173                use_internal_rev_list = 1;
3174                argv_array_push(&rp, "--all");
3175        }
3176        if (rev_list_reflog) {
3177                use_internal_rev_list = 1;
3178                argv_array_push(&rp, "--reflog");
3179        }
3180        if (rev_list_index) {
3181                use_internal_rev_list = 1;
3182                argv_array_push(&rp, "--indexed-objects");
3183        }
3184        if (rev_list_unpacked) {
3185                use_internal_rev_list = 1;
3186                argv_array_push(&rp, "--unpacked");
3187        }
3188
3189        if (exclude_promisor_objects) {
3190                use_internal_rev_list = 1;
3191                fetch_if_missing = 0;
3192                argv_array_push(&rp, "--exclude-promisor-objects");
3193        }
3194
3195        if (!reuse_object)
3196                reuse_delta = 0;
3197        if (pack_compression_level == -1)
3198                pack_compression_level = Z_DEFAULT_COMPRESSION;
3199        else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
3200                die("bad pack compression level %d", pack_compression_level);
3201
3202        if (!delta_search_threads)      /* --threads=0 means autodetect */
3203                delta_search_threads = online_cpus();
3204
3205#ifdef NO_PTHREADS
3206        if (delta_search_threads != 1)
3207                warning("no threads support, ignoring --threads");
3208#endif
3209        if (!pack_to_stdout && !pack_size_limit)
3210                pack_size_limit = pack_size_limit_cfg;
3211        if (pack_to_stdout && pack_size_limit)
3212                die("--max-pack-size cannot be used to build a pack for transfer.");
3213        if (pack_size_limit && pack_size_limit < 1024*1024) {
3214                warning("minimum pack size limit is 1 MiB");
3215                pack_size_limit = 1024*1024;
3216        }
3217
3218        if (!pack_to_stdout && thin)
3219                die("--thin cannot be used to build an indexable pack.");
3220
3221        if (keep_unreachable && unpack_unreachable)
3222                die("--keep-unreachable and --unpack-unreachable are incompatible.");
3223        if (!rev_list_all || !rev_list_reflog || !rev_list_index)
3224                unpack_unreachable_expiration = 0;
3225
3226        if (filter_options.choice) {
3227                if (!pack_to_stdout)
3228                        die("cannot use --filter without --stdout.");
3229                use_bitmap_index = 0;
3230        }
3231
3232        /*
3233         * "soft" reasons not to use bitmaps - for on-disk repack by default we want
3234         *
3235         * - to produce good pack (with bitmap index not-yet-packed objects are
3236         *   packed in suboptimal order).
3237         *
3238         * - to use more robust pack-generation codepath (avoiding possible
3239         *   bugs in bitmap code and possible bitmap index corruption).
3240         */
3241        if (!pack_to_stdout)
3242                use_bitmap_index_default = 0;
3243
3244        if (use_bitmap_index < 0)
3245                use_bitmap_index = use_bitmap_index_default;
3246
3247        /* "hard" reasons not to use bitmaps; these just won't work at all */
3248        if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow())
3249                use_bitmap_index = 0;
3250
3251        if (pack_to_stdout || !rev_list_all)
3252                write_bitmap_index = 0;
3253
3254        if (progress && all_progress_implied)
3255                progress = 2;
3256
3257        if (ignore_packed_keep) {
3258                struct packed_git *p;
3259                for (p = get_packed_git(the_repository); p; p = p->next)
3260                        if (p->pack_local && p->pack_keep)
3261                                break;
3262                if (!p) /* no keep-able packs found */
3263                        ignore_packed_keep = 0;
3264        }
3265        if (local) {
3266                /*
3267                 * unlike ignore_packed_keep above, we do not want to
3268                 * unset "local" based on looking at packs, as it
3269                 * also covers non-local objects
3270                 */
3271                struct packed_git *p;
3272                for (p = get_packed_git(the_repository); p; p = p->next) {
3273                        if (!p->pack_local) {
3274                                have_non_local_packs = 1;
3275                                break;
3276                        }
3277                }
3278        }
3279
3280        prepare_packing_data(&to_pack);
3281
3282        if (progress)
3283                progress_state = start_progress(_("Counting objects"), 0);
3284        if (!use_internal_rev_list)
3285                read_object_list_from_stdin();
3286        else {
3287                get_object_list(rp.argc, rp.argv);
3288                argv_array_clear(&rp);
3289        }
3290        cleanup_preferred_base();
3291        if (include_tag && nr_result)
3292                for_each_ref(add_ref_tag, NULL);
3293        stop_progress(&progress_state);
3294
3295        if (non_empty && !nr_result)
3296                return 0;
3297        if (nr_result)
3298                prepare_pack(window, depth);
3299        write_pack_file();
3300        if (progress)
3301                fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
3302                        " reused %"PRIu32" (delta %"PRIu32")\n",
3303                        written, written_delta, reused, reused_delta);
3304        return 0;
3305}