midx.con commit multi-pack-index: implement 'expire' subcommand (19575c7)
   1#include "cache.h"
   2#include "config.h"
   3#include "csum-file.h"
   4#include "dir.h"
   5#include "lockfile.h"
   6#include "packfile.h"
   7#include "object-store.h"
   8#include "sha1-lookup.h"
   9#include "midx.h"
  10#include "progress.h"
  11#include "trace2.h"
  12
  13#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
  14#define MIDX_VERSION 1
  15#define MIDX_BYTE_FILE_VERSION 4
  16#define MIDX_BYTE_HASH_VERSION 5
  17#define MIDX_BYTE_NUM_CHUNKS 6
  18#define MIDX_BYTE_NUM_PACKS 8
  19#define MIDX_HASH_VERSION 1
  20#define MIDX_HEADER_SIZE 12
  21#define MIDX_HASH_LEN 20
  22#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
  23
  24#define MIDX_MAX_CHUNKS 5
  25#define MIDX_CHUNK_ALIGNMENT 4
  26#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
  27#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
  28#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
  29#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
  30#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
  31#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
  32#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
  33#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
  34#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
  35#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
  36
  37#define PACK_EXPIRED UINT_MAX
  38
  39static char *get_midx_filename(const char *object_dir)
  40{
  41        return xstrfmt("%s/pack/multi-pack-index", object_dir);
  42}
  43
  44struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
  45{
  46        struct multi_pack_index *m = NULL;
  47        int fd;
  48        struct stat st;
  49        size_t midx_size;
  50        void *midx_map = NULL;
  51        uint32_t hash_version;
  52        char *midx_name = get_midx_filename(object_dir);
  53        uint32_t i;
  54        const char *cur_pack_name;
  55
  56        fd = git_open(midx_name);
  57
  58        if (fd < 0)
  59                goto cleanup_fail;
  60        if (fstat(fd, &st)) {
  61                error_errno(_("failed to read %s"), midx_name);
  62                goto cleanup_fail;
  63        }
  64
  65        midx_size = xsize_t(st.st_size);
  66
  67        if (midx_size < MIDX_MIN_SIZE) {
  68                error(_("multi-pack-index file %s is too small"), midx_name);
  69                goto cleanup_fail;
  70        }
  71
  72        FREE_AND_NULL(midx_name);
  73
  74        midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
  75
  76        FLEX_ALLOC_STR(m, object_dir, object_dir);
  77        m->fd = fd;
  78        m->data = midx_map;
  79        m->data_len = midx_size;
  80        m->local = local;
  81
  82        m->signature = get_be32(m->data);
  83        if (m->signature != MIDX_SIGNATURE)
  84                die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
  85                      m->signature, MIDX_SIGNATURE);
  86
  87        m->version = m->data[MIDX_BYTE_FILE_VERSION];
  88        if (m->version != MIDX_VERSION)
  89                die(_("multi-pack-index version %d not recognized"),
  90                      m->version);
  91
  92        hash_version = m->data[MIDX_BYTE_HASH_VERSION];
  93        if (hash_version != MIDX_HASH_VERSION)
  94                die(_("hash version %u does not match"), hash_version);
  95        m->hash_len = MIDX_HASH_LEN;
  96
  97        m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
  98
  99        m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
 100
 101        for (i = 0; i < m->num_chunks; i++) {
 102                uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
 103                                             MIDX_CHUNKLOOKUP_WIDTH * i);
 104                uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
 105                                                 MIDX_CHUNKLOOKUP_WIDTH * i);
 106
 107                if (chunk_offset >= m->data_len)
 108                        die(_("invalid chunk offset (too large)"));
 109
 110                switch (chunk_id) {
 111                        case MIDX_CHUNKID_PACKNAMES:
 112                                m->chunk_pack_names = m->data + chunk_offset;
 113                                break;
 114
 115                        case MIDX_CHUNKID_OIDFANOUT:
 116                                m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
 117                                break;
 118
 119                        case MIDX_CHUNKID_OIDLOOKUP:
 120                                m->chunk_oid_lookup = m->data + chunk_offset;
 121                                break;
 122
 123                        case MIDX_CHUNKID_OBJECTOFFSETS:
 124                                m->chunk_object_offsets = m->data + chunk_offset;
 125                                break;
 126
 127                        case MIDX_CHUNKID_LARGEOFFSETS:
 128                                m->chunk_large_offsets = m->data + chunk_offset;
 129                                break;
 130
 131                        case 0:
 132                                die(_("terminating multi-pack-index chunk id appears earlier than expected"));
 133                                break;
 134
 135                        default:
 136                                /*
 137                                 * Do nothing on unrecognized chunks, allowing future
 138                                 * extensions to add optional chunks.
 139                                 */
 140                                break;
 141                }
 142        }
 143
 144        if (!m->chunk_pack_names)
 145                die(_("multi-pack-index missing required pack-name chunk"));
 146        if (!m->chunk_oid_fanout)
 147                die(_("multi-pack-index missing required OID fanout chunk"));
 148        if (!m->chunk_oid_lookup)
 149                die(_("multi-pack-index missing required OID lookup chunk"));
 150        if (!m->chunk_object_offsets)
 151                die(_("multi-pack-index missing required object offsets chunk"));
 152
 153        m->num_objects = ntohl(m->chunk_oid_fanout[255]);
 154
 155        m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));
 156        m->packs = xcalloc(m->num_packs, sizeof(*m->packs));
 157
 158        cur_pack_name = (const char *)m->chunk_pack_names;
 159        for (i = 0; i < m->num_packs; i++) {
 160                m->pack_names[i] = cur_pack_name;
 161
 162                cur_pack_name += strlen(cur_pack_name) + 1;
 163
 164                if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
 165                        die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
 166                              m->pack_names[i - 1],
 167                              m->pack_names[i]);
 168        }
 169
 170        trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs);
 171        trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects);
 172
 173        return m;
 174
 175cleanup_fail:
 176        free(m);
 177        free(midx_name);
 178        if (midx_map)
 179                munmap(midx_map, midx_size);
 180        if (0 <= fd)
 181                close(fd);
 182        return NULL;
 183}
 184
 185void close_midx(struct multi_pack_index *m)
 186{
 187        uint32_t i;
 188
 189        if (!m)
 190                return;
 191
 192        munmap((unsigned char *)m->data, m->data_len);
 193        close(m->fd);
 194        m->fd = -1;
 195
 196        for (i = 0; i < m->num_packs; i++) {
 197                if (m->packs[i])
 198                        m->packs[i]->multi_pack_index = 0;
 199        }
 200        FREE_AND_NULL(m->packs);
 201        FREE_AND_NULL(m->pack_names);
 202}
 203
 204int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id)
 205{
 206        struct strbuf pack_name = STRBUF_INIT;
 207        struct packed_git *p;
 208
 209        if (pack_int_id >= m->num_packs)
 210                die(_("bad pack-int-id: %u (%u total packs)"),
 211                    pack_int_id, m->num_packs);
 212
 213        if (m->packs[pack_int_id])
 214                return 0;
 215
 216        strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
 217                    m->pack_names[pack_int_id]);
 218
 219        p = add_packed_git(pack_name.buf, pack_name.len, m->local);
 220        strbuf_release(&pack_name);
 221
 222        if (!p)
 223                return 1;
 224
 225        p->multi_pack_index = 1;
 226        m->packs[pack_int_id] = p;
 227        install_packed_git(r, p);
 228        list_add_tail(&p->mru, &r->objects->packed_git_mru);
 229
 230        return 0;
 231}
 232
 233int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
 234{
 235        return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
 236                            MIDX_HASH_LEN, result);
 237}
 238
 239struct object_id *nth_midxed_object_oid(struct object_id *oid,
 240                                        struct multi_pack_index *m,
 241                                        uint32_t n)
 242{
 243        if (n >= m->num_objects)
 244                return NULL;
 245
 246        hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n);
 247        return oid;
 248}
 249
 250static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
 251{
 252        const unsigned char *offset_data;
 253        uint32_t offset32;
 254
 255        offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH;
 256        offset32 = get_be32(offset_data + sizeof(uint32_t));
 257
 258        if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
 259                if (sizeof(off_t) < sizeof(uint64_t))
 260                        die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
 261
 262                offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
 263                return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
 264        }
 265
 266        return offset32;
 267}
 268
 269static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
 270{
 271        return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH);
 272}
 273
 274static int nth_midxed_pack_entry(struct repository *r,
 275                                 struct multi_pack_index *m,
 276                                 struct pack_entry *e,
 277                                 uint32_t pos)
 278{
 279        uint32_t pack_int_id;
 280        struct packed_git *p;
 281
 282        if (pos >= m->num_objects)
 283                return 0;
 284
 285        pack_int_id = nth_midxed_pack_int_id(m, pos);
 286
 287        if (prepare_midx_pack(r, m, pack_int_id))
 288                die(_("error preparing packfile from multi-pack-index"));
 289        p = m->packs[pack_int_id];
 290
 291        /*
 292        * We are about to tell the caller where they can locate the
 293        * requested object.  We better make sure the packfile is
 294        * still here and can be accessed before supplying that
 295        * answer, as it may have been deleted since the MIDX was
 296        * loaded!
 297        */
 298        if (!is_pack_valid(p))
 299                return 0;
 300
 301        if (p->num_bad_objects) {
 302                uint32_t i;
 303                struct object_id oid;
 304                nth_midxed_object_oid(&oid, m, pos);
 305                for (i = 0; i < p->num_bad_objects; i++)
 306                        if (hasheq(oid.hash,
 307                                   p->bad_object_sha1 + the_hash_algo->rawsz * i))
 308                                return 0;
 309        }
 310
 311        e->offset = nth_midxed_offset(m, pos);
 312        e->p = p;
 313
 314        return 1;
 315}
 316
 317int fill_midx_entry(struct repository * r,
 318                    const struct object_id *oid,
 319                    struct pack_entry *e,
 320                    struct multi_pack_index *m)
 321{
 322        uint32_t pos;
 323
 324        if (!bsearch_midx(oid, m, &pos))
 325                return 0;
 326
 327        return nth_midxed_pack_entry(r, m, e, pos);
 328}
 329
 330/* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */
 331static int cmp_idx_or_pack_name(const char *idx_or_pack_name,
 332                                const char *idx_name)
 333{
 334        /* Skip past any initial matching prefix. */
 335        while (*idx_name && *idx_name == *idx_or_pack_name) {
 336                idx_name++;
 337                idx_or_pack_name++;
 338        }
 339
 340        /*
 341         * If we didn't match completely, we may have matched "pack-1234." and
 342         * be left with "idx" and "pack" respectively, which is also OK. We do
 343         * not have to check for "idx" and "idx", because that would have been
 344         * a complete match (and in that case these strcmps will be false, but
 345         * we'll correctly return 0 from the final strcmp() below.
 346         *
 347         * Technically this matches "fooidx" and "foopack", but we'd never have
 348         * such names in the first place.
 349         */
 350        if (!strcmp(idx_name, "idx") && !strcmp(idx_or_pack_name, "pack"))
 351                return 0;
 352
 353        /*
 354         * This not only checks for a complete match, but also orders based on
 355         * the first non-identical character, which means our ordering will
 356         * match a raw strcmp(). That makes it OK to use this to binary search
 357         * a naively-sorted list.
 358         */
 359        return strcmp(idx_or_pack_name, idx_name);
 360}
 361
 362int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
 363{
 364        uint32_t first = 0, last = m->num_packs;
 365
 366        while (first < last) {
 367                uint32_t mid = first + (last - first) / 2;
 368                const char *current;
 369                int cmp;
 370
 371                current = m->pack_names[mid];
 372                cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
 373                if (!cmp)
 374                        return 1;
 375                if (cmp > 0) {
 376                        first = mid + 1;
 377                        continue;
 378                }
 379                last = mid;
 380        }
 381
 382        return 0;
 383}
 384
 385int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
 386{
 387        struct multi_pack_index *m;
 388        struct multi_pack_index *m_search;
 389        int config_value;
 390        static int env_value = -1;
 391
 392        if (env_value < 0)
 393                env_value = git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0);
 394
 395        if (!env_value &&
 396            (repo_config_get_bool(r, "core.multipackindex", &config_value) ||
 397            !config_value))
 398                return 0;
 399
 400        for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next)
 401                if (!strcmp(object_dir, m_search->object_dir))
 402                        return 1;
 403
 404        m = load_multi_pack_index(object_dir, local);
 405
 406        if (m) {
 407                m->next = r->objects->multi_pack_index;
 408                r->objects->multi_pack_index = m;
 409                return 1;
 410        }
 411
 412        return 0;
 413}
 414
 415static size_t write_midx_header(struct hashfile *f,
 416                                unsigned char num_chunks,
 417                                uint32_t num_packs)
 418{
 419        unsigned char byte_values[4];
 420
 421        hashwrite_be32(f, MIDX_SIGNATURE);
 422        byte_values[0] = MIDX_VERSION;
 423        byte_values[1] = MIDX_HASH_VERSION;
 424        byte_values[2] = num_chunks;
 425        byte_values[3] = 0; /* unused */
 426        hashwrite(f, byte_values, sizeof(byte_values));
 427        hashwrite_be32(f, num_packs);
 428
 429        return MIDX_HEADER_SIZE;
 430}
 431
 432struct pack_info {
 433        uint32_t orig_pack_int_id;
 434        char *pack_name;
 435        struct packed_git *p;
 436        unsigned expired : 1;
 437};
 438
 439static int pack_info_compare(const void *_a, const void *_b)
 440{
 441        struct pack_info *a = (struct pack_info *)_a;
 442        struct pack_info *b = (struct pack_info *)_b;
 443        return strcmp(a->pack_name, b->pack_name);
 444}
 445
 446struct pack_list {
 447        struct pack_info *info;
 448        uint32_t nr;
 449        uint32_t alloc;
 450        struct multi_pack_index *m;
 451};
 452
 453static void add_pack_to_midx(const char *full_path, size_t full_path_len,
 454                             const char *file_name, void *data)
 455{
 456        struct pack_list *packs = (struct pack_list *)data;
 457
 458        if (ends_with(file_name, ".idx")) {
 459                if (packs->m && midx_contains_pack(packs->m, file_name))
 460                        return;
 461
 462                ALLOC_GROW(packs->info, packs->nr + 1, packs->alloc);
 463
 464                packs->info[packs->nr].p = add_packed_git(full_path,
 465                                                          full_path_len,
 466                                                          0);
 467
 468                if (!packs->info[packs->nr].p) {
 469                        warning(_("failed to add packfile '%s'"),
 470                                full_path);
 471                        return;
 472                }
 473
 474                if (open_pack_index(packs->info[packs->nr].p)) {
 475                        warning(_("failed to open pack-index '%s'"),
 476                                full_path);
 477                        close_pack(packs->info[packs->nr].p);
 478                        FREE_AND_NULL(packs->info[packs->nr].p);
 479                        return;
 480                }
 481
 482                packs->info[packs->nr].pack_name = xstrdup(file_name);
 483                packs->info[packs->nr].orig_pack_int_id = packs->nr;
 484                packs->info[packs->nr].expired = 0;
 485                packs->nr++;
 486        }
 487}
 488
 489struct pack_midx_entry {
 490        struct object_id oid;
 491        uint32_t pack_int_id;
 492        time_t pack_mtime;
 493        uint64_t offset;
 494};
 495
 496static int midx_oid_compare(const void *_a, const void *_b)
 497{
 498        const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
 499        const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
 500        int cmp = oidcmp(&a->oid, &b->oid);
 501
 502        if (cmp)
 503                return cmp;
 504
 505        if (a->pack_mtime > b->pack_mtime)
 506                return -1;
 507        else if (a->pack_mtime < b->pack_mtime)
 508                return 1;
 509
 510        return a->pack_int_id - b->pack_int_id;
 511}
 512
 513static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
 514                                      struct pack_midx_entry *e,
 515                                      uint32_t pos)
 516{
 517        if (pos >= m->num_objects)
 518                return 1;
 519
 520        nth_midxed_object_oid(&e->oid, m, pos);
 521        e->pack_int_id = nth_midxed_pack_int_id(m, pos);
 522        e->offset = nth_midxed_offset(m, pos);
 523
 524        /* consider objects in midx to be from "old" packs */
 525        e->pack_mtime = 0;
 526        return 0;
 527}
 528
 529static void fill_pack_entry(uint32_t pack_int_id,
 530                            struct packed_git *p,
 531                            uint32_t cur_object,
 532                            struct pack_midx_entry *entry)
 533{
 534        if (!nth_packed_object_oid(&entry->oid, p, cur_object))
 535                die(_("failed to locate object %d in packfile"), cur_object);
 536
 537        entry->pack_int_id = pack_int_id;
 538        entry->pack_mtime = p->mtime;
 539
 540        entry->offset = nth_packed_object_offset(p, cur_object);
 541}
 542
 543/*
 544 * It is possible to artificially get into a state where there are many
 545 * duplicate copies of objects. That can create high memory pressure if
 546 * we are to create a list of all objects before de-duplication. To reduce
 547 * this memory pressure without a significant performance drop, automatically
 548 * group objects by the first byte of their object id. Use the IDX fanout
 549 * tables to group the data, copy to a local array, then sort.
 550 *
 551 * Copy only the de-duplicated entries (selected by most-recent modified time
 552 * of a packfile containing the object).
 553 */
 554static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 555                                                  struct pack_info *info,
 556                                                  uint32_t nr_packs,
 557                                                  uint32_t *nr_objects)
 558{
 559        uint32_t cur_fanout, cur_pack, cur_object;
 560        uint32_t alloc_fanout, alloc_objects, total_objects = 0;
 561        struct pack_midx_entry *entries_by_fanout = NULL;
 562        struct pack_midx_entry *deduplicated_entries = NULL;
 563        uint32_t start_pack = m ? m->num_packs : 0;
 564
 565        for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
 566                total_objects += info[cur_pack].p->num_objects;
 567
 568        /*
 569         * As we de-duplicate by fanout value, we expect the fanout
 570         * slices to be evenly distributed, with some noise. Hence,
 571         * allocate slightly more than one 256th.
 572         */
 573        alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
 574
 575        ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
 576        ALLOC_ARRAY(deduplicated_entries, alloc_objects);
 577        *nr_objects = 0;
 578
 579        for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
 580                uint32_t nr_fanout = 0;
 581
 582                if (m) {
 583                        uint32_t start = 0, end;
 584
 585                        if (cur_fanout)
 586                                start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]);
 587                        end = ntohl(m->chunk_oid_fanout[cur_fanout]);
 588
 589                        for (cur_object = start; cur_object < end; cur_object++) {
 590                                ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
 591                                nth_midxed_pack_midx_entry(m,
 592                                                           &entries_by_fanout[nr_fanout],
 593                                                           cur_object);
 594                                nr_fanout++;
 595                        }
 596                }
 597
 598                for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
 599                        uint32_t start = 0, end;
 600
 601                        if (cur_fanout)
 602                                start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
 603                        end = get_pack_fanout(info[cur_pack].p, cur_fanout);
 604
 605                        for (cur_object = start; cur_object < end; cur_object++) {
 606                                ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
 607                                fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
 608                                nr_fanout++;
 609                        }
 610                }
 611
 612                QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
 613
 614                /*
 615                 * The batch is now sorted by OID and then mtime (descending).
 616                 * Take only the first duplicate.
 617                 */
 618                for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
 619                        if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid,
 620                                                &entries_by_fanout[cur_object].oid))
 621                                continue;
 622
 623                        ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
 624                        memcpy(&deduplicated_entries[*nr_objects],
 625                               &entries_by_fanout[cur_object],
 626                               sizeof(struct pack_midx_entry));
 627                        (*nr_objects)++;
 628                }
 629        }
 630
 631        free(entries_by_fanout);
 632        return deduplicated_entries;
 633}
 634
 635static size_t write_midx_pack_names(struct hashfile *f,
 636                                    struct pack_info *info,
 637                                    uint32_t num_packs)
 638{
 639        uint32_t i;
 640        unsigned char padding[MIDX_CHUNK_ALIGNMENT];
 641        size_t written = 0;
 642
 643        for (i = 0; i < num_packs; i++) {
 644                size_t writelen;
 645
 646                if (info[i].expired)
 647                        continue;
 648
 649                if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0)
 650                        BUG("incorrect pack-file order: %s before %s",
 651                            info[i - 1].pack_name,
 652                            info[i].pack_name);
 653
 654                writelen = strlen(info[i].pack_name) + 1;
 655                hashwrite(f, info[i].pack_name, writelen);
 656                written += writelen;
 657        }
 658
 659        /* add padding to be aligned */
 660        i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
 661        if (i < MIDX_CHUNK_ALIGNMENT) {
 662                memset(padding, 0, sizeof(padding));
 663                hashwrite(f, padding, i);
 664                written += i;
 665        }
 666
 667        return written;
 668}
 669
 670static size_t write_midx_oid_fanout(struct hashfile *f,
 671                                    struct pack_midx_entry *objects,
 672                                    uint32_t nr_objects)
 673{
 674        struct pack_midx_entry *list = objects;
 675        struct pack_midx_entry *last = objects + nr_objects;
 676        uint32_t count = 0;
 677        uint32_t i;
 678
 679        /*
 680        * Write the first-level table (the list is sorted,
 681        * but we use a 256-entry lookup to be able to avoid
 682        * having to do eight extra binary search iterations).
 683        */
 684        for (i = 0; i < 256; i++) {
 685                struct pack_midx_entry *next = list;
 686
 687                while (next < last && next->oid.hash[0] == i) {
 688                        count++;
 689                        next++;
 690                }
 691
 692                hashwrite_be32(f, count);
 693                list = next;
 694        }
 695
 696        return MIDX_CHUNK_FANOUT_SIZE;
 697}
 698
 699static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
 700                                    struct pack_midx_entry *objects,
 701                                    uint32_t nr_objects)
 702{
 703        struct pack_midx_entry *list = objects;
 704        uint32_t i;
 705        size_t written = 0;
 706
 707        for (i = 0; i < nr_objects; i++) {
 708                struct pack_midx_entry *obj = list++;
 709
 710                if (i < nr_objects - 1) {
 711                        struct pack_midx_entry *next = list;
 712                        if (oidcmp(&obj->oid, &next->oid) >= 0)
 713                                BUG("OIDs not in order: %s >= %s",
 714                                    oid_to_hex(&obj->oid),
 715                                    oid_to_hex(&next->oid));
 716                }
 717
 718                hashwrite(f, obj->oid.hash, (int)hash_len);
 719                written += hash_len;
 720        }
 721
 722        return written;
 723}
 724
 725static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed,
 726                                        uint32_t *perm,
 727                                        struct pack_midx_entry *objects, uint32_t nr_objects)
 728{
 729        struct pack_midx_entry *list = objects;
 730        uint32_t i, nr_large_offset = 0;
 731        size_t written = 0;
 732
 733        for (i = 0; i < nr_objects; i++) {
 734                struct pack_midx_entry *obj = list++;
 735
 736                if (perm[obj->pack_int_id] == PACK_EXPIRED)
 737                        BUG("object %s is in an expired pack with int-id %d",
 738                            oid_to_hex(&obj->oid),
 739                            obj->pack_int_id);
 740
 741                hashwrite_be32(f, perm[obj->pack_int_id]);
 742
 743                if (large_offset_needed && obj->offset >> 31)
 744                        hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
 745                else if (!large_offset_needed && obj->offset >> 32)
 746                        BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!",
 747                            oid_to_hex(&obj->oid),
 748                            obj->offset);
 749                else
 750                        hashwrite_be32(f, (uint32_t)obj->offset);
 751
 752                written += MIDX_CHUNK_OFFSET_WIDTH;
 753        }
 754
 755        return written;
 756}
 757
 758static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_offset,
 759                                       struct pack_midx_entry *objects, uint32_t nr_objects)
 760{
 761        struct pack_midx_entry *list = objects, *end = objects + nr_objects;
 762        size_t written = 0;
 763
 764        while (nr_large_offset) {
 765                struct pack_midx_entry *obj;
 766                uint64_t offset;
 767
 768                if (list >= end)
 769                        BUG("too many large-offset objects");
 770
 771                obj = list++;
 772                offset = obj->offset;
 773
 774                if (!(offset >> 31))
 775                        continue;
 776
 777                hashwrite_be32(f, offset >> 32);
 778                hashwrite_be32(f, offset & 0xffffffffUL);
 779                written += 2 * sizeof(uint32_t);
 780
 781                nr_large_offset--;
 782        }
 783
 784        return written;
 785}
 786
 787static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
 788                               struct string_list *packs_to_drop)
 789{
 790        unsigned char cur_chunk, num_chunks = 0;
 791        char *midx_name;
 792        uint32_t i;
 793        struct hashfile *f = NULL;
 794        struct lock_file lk;
 795        struct pack_list packs;
 796        uint32_t *pack_perm = NULL;
 797        uint64_t written = 0;
 798        uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
 799        uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
 800        uint32_t nr_entries, num_large_offsets = 0;
 801        struct pack_midx_entry *entries = NULL;
 802        int large_offsets_needed = 0;
 803        int pack_name_concat_len = 0;
 804        int dropped_packs = 0;
 805        int result = 0;
 806
 807        midx_name = get_midx_filename(object_dir);
 808        if (safe_create_leading_directories(midx_name)) {
 809                UNLEAK(midx_name);
 810                die_errno(_("unable to create leading directories of %s"),
 811                          midx_name);
 812        }
 813
 814        if (m)
 815                packs.m = m;
 816        else
 817                packs.m = load_multi_pack_index(object_dir, 1);
 818
 819        packs.nr = 0;
 820        packs.alloc = packs.m ? packs.m->num_packs : 16;
 821        packs.info = NULL;
 822        ALLOC_ARRAY(packs.info, packs.alloc);
 823
 824        if (packs.m) {
 825                for (i = 0; i < packs.m->num_packs; i++) {
 826                        ALLOC_GROW(packs.info, packs.nr + 1, packs.alloc);
 827
 828                        packs.info[packs.nr].orig_pack_int_id = i;
 829                        packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]);
 830                        packs.info[packs.nr].p = NULL;
 831                        packs.info[packs.nr].expired = 0;
 832                        packs.nr++;
 833                }
 834        }
 835
 836        for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs);
 837
 838        if (packs.m && packs.nr == packs.m->num_packs && !packs_to_drop)
 839                goto cleanup;
 840
 841        entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries);
 842
 843        for (i = 0; i < nr_entries; i++) {
 844                if (entries[i].offset > 0x7fffffff)
 845                        num_large_offsets++;
 846                if (entries[i].offset > 0xffffffff)
 847                        large_offsets_needed = 1;
 848        }
 849
 850        QSORT(packs.info, packs.nr, pack_info_compare);
 851
 852        if (packs_to_drop && packs_to_drop->nr) {
 853                int drop_index = 0;
 854                int missing_drops = 0;
 855
 856                for (i = 0; i < packs.nr && drop_index < packs_to_drop->nr; i++) {
 857                        int cmp = strcmp(packs.info[i].pack_name,
 858                                         packs_to_drop->items[drop_index].string);
 859
 860                        if (!cmp) {
 861                                drop_index++;
 862                                packs.info[i].expired = 1;
 863                        } else if (cmp > 0) {
 864                                error(_("did not see pack-file %s to drop"),
 865                                      packs_to_drop->items[drop_index].string);
 866                                drop_index++;
 867                                missing_drops++;
 868                                i--;
 869                        } else {
 870                                packs.info[i].expired = 0;
 871                        }
 872                }
 873
 874                if (missing_drops) {
 875                        result = 1;
 876                        goto cleanup;
 877                }
 878        }
 879
 880        /*
 881         * pack_perm stores a permutation between pack-int-ids from the
 882         * previous multi-pack-index to the new one we are writing:
 883         *
 884         * pack_perm[old_id] = new_id
 885         */
 886        ALLOC_ARRAY(pack_perm, packs.nr);
 887        for (i = 0; i < packs.nr; i++) {
 888                if (packs.info[i].expired) {
 889                        dropped_packs++;
 890                        pack_perm[packs.info[i].orig_pack_int_id] = PACK_EXPIRED;
 891                } else {
 892                        pack_perm[packs.info[i].orig_pack_int_id] = i - dropped_packs;
 893                }
 894        }
 895
 896        for (i = 0; i < packs.nr; i++) {
 897                if (!packs.info[i].expired)
 898                        pack_name_concat_len += strlen(packs.info[i].pack_name) + 1;
 899        }
 900
 901        if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
 902                pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
 903                                        (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
 904
 905        hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
 906        f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
 907        FREE_AND_NULL(midx_name);
 908
 909        if (packs.m)
 910                close_midx(packs.m);
 911
 912        cur_chunk = 0;
 913        num_chunks = large_offsets_needed ? 5 : 4;
 914
 915        written = write_midx_header(f, num_chunks, packs.nr - dropped_packs);
 916
 917        chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
 918        chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
 919
 920        cur_chunk++;
 921        chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
 922        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
 923
 924        cur_chunk++;
 925        chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
 926        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE;
 927
 928        cur_chunk++;
 929        chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS;
 930        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN;
 931
 932        cur_chunk++;
 933        chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_CHUNK_OFFSET_WIDTH;
 934        if (large_offsets_needed) {
 935                chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS;
 936
 937                cur_chunk++;
 938                chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] +
 939                                           num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH;
 940        }
 941
 942        chunk_ids[cur_chunk] = 0;
 943
 944        for (i = 0; i <= num_chunks; i++) {
 945                if (i && chunk_offsets[i] < chunk_offsets[i - 1])
 946                        BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
 947                            chunk_offsets[i - 1],
 948                            chunk_offsets[i]);
 949
 950                if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
 951                        BUG("chunk offset %"PRIu64" is not properly aligned",
 952                            chunk_offsets[i]);
 953
 954                hashwrite_be32(f, chunk_ids[i]);
 955                hashwrite_be32(f, chunk_offsets[i] >> 32);
 956                hashwrite_be32(f, chunk_offsets[i]);
 957
 958                written += MIDX_CHUNKLOOKUP_WIDTH;
 959        }
 960
 961        for (i = 0; i < num_chunks; i++) {
 962                if (written != chunk_offsets[i])
 963                        BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
 964                            chunk_offsets[i],
 965                            written,
 966                            chunk_ids[i]);
 967
 968                switch (chunk_ids[i]) {
 969                        case MIDX_CHUNKID_PACKNAMES:
 970                                written += write_midx_pack_names(f, packs.info, packs.nr);
 971                                break;
 972
 973                        case MIDX_CHUNKID_OIDFANOUT:
 974                                written += write_midx_oid_fanout(f, entries, nr_entries);
 975                                break;
 976
 977                        case MIDX_CHUNKID_OIDLOOKUP:
 978                                written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries);
 979                                break;
 980
 981                        case MIDX_CHUNKID_OBJECTOFFSETS:
 982                                written += write_midx_object_offsets(f, large_offsets_needed, pack_perm, entries, nr_entries);
 983                                break;
 984
 985                        case MIDX_CHUNKID_LARGEOFFSETS:
 986                                written += write_midx_large_offsets(f, num_large_offsets, entries, nr_entries);
 987                                break;
 988
 989                        default:
 990                                BUG("trying to write unknown chunk id %"PRIx32,
 991                                    chunk_ids[i]);
 992                }
 993        }
 994
 995        if (written != chunk_offsets[num_chunks])
 996                BUG("incorrect final offset %"PRIu64" != %"PRIu64,
 997                    written,
 998                    chunk_offsets[num_chunks]);
 999
1000        finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
1001        commit_lock_file(&lk);
1002
1003cleanup:
1004        for (i = 0; i < packs.nr; i++) {
1005                if (packs.info[i].p) {
1006                        close_pack(packs.info[i].p);
1007                        free(packs.info[i].p);
1008                }
1009                free(packs.info[i].pack_name);
1010        }
1011
1012        free(packs.info);
1013        free(entries);
1014        free(pack_perm);
1015        free(midx_name);
1016        return result;
1017}
1018
1019int write_midx_file(const char *object_dir)
1020{
1021        return write_midx_internal(object_dir, NULL, NULL);
1022}
1023
1024void clear_midx_file(struct repository *r)
1025{
1026        char *midx = get_midx_filename(r->objects->odb->path);
1027
1028        if (r->objects && r->objects->multi_pack_index) {
1029                close_midx(r->objects->multi_pack_index);
1030                r->objects->multi_pack_index = NULL;
1031        }
1032
1033        if (remove_path(midx)) {
1034                UNLEAK(midx);
1035                die(_("failed to clear multi-pack-index at %s"), midx);
1036        }
1037
1038        free(midx);
1039}
1040
1041static int verify_midx_error;
1042
1043static void midx_report(const char *fmt, ...)
1044{
1045        va_list ap;
1046        verify_midx_error = 1;
1047        va_start(ap, fmt);
1048        vfprintf(stderr, fmt, ap);
1049        fprintf(stderr, "\n");
1050        va_end(ap);
1051}
1052
1053struct pair_pos_vs_id
1054{
1055        uint32_t pos;
1056        uint32_t pack_int_id;
1057};
1058
1059static int compare_pair_pos_vs_id(const void *_a, const void *_b)
1060{
1061        struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a;
1062        struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b;
1063
1064        return b->pack_int_id - a->pack_int_id;
1065}
1066
1067/*
1068 * Limit calls to display_progress() for performance reasons.
1069 * The interval here was arbitrarily chosen.
1070 */
1071#define SPARSE_PROGRESS_INTERVAL (1 << 12)
1072#define midx_display_sparse_progress(progress, n) \
1073        do { \
1074                uint64_t _n = (n); \
1075                if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \
1076                        display_progress(progress, _n); \
1077        } while (0)
1078
1079int verify_midx_file(struct repository *r, const char *object_dir)
1080{
1081        struct pair_pos_vs_id *pairs = NULL;
1082        uint32_t i;
1083        struct progress *progress;
1084        struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
1085        verify_midx_error = 0;
1086
1087        if (!m)
1088                return 0;
1089
1090        progress = start_progress(_("Looking for referenced packfiles"),
1091                                  m->num_packs);
1092        for (i = 0; i < m->num_packs; i++) {
1093                if (prepare_midx_pack(r, m, i))
1094                        midx_report("failed to load pack in position %d", i);
1095
1096                display_progress(progress, i + 1);
1097        }
1098        stop_progress(&progress);
1099
1100        for (i = 0; i < 255; i++) {
1101                uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
1102                uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]);
1103
1104                if (oid_fanout1 > oid_fanout2)
1105                        midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
1106                                    i, oid_fanout1, oid_fanout2, i + 1);
1107        }
1108
1109        progress = start_sparse_progress(_("Verifying OID order in MIDX"),
1110                                         m->num_objects - 1);
1111        for (i = 0; i < m->num_objects - 1; i++) {
1112                struct object_id oid1, oid2;
1113
1114                nth_midxed_object_oid(&oid1, m, i);
1115                nth_midxed_object_oid(&oid2, m, i + 1);
1116
1117                if (oidcmp(&oid1, &oid2) >= 0)
1118                        midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
1119                                    i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
1120
1121                midx_display_sparse_progress(progress, i + 1);
1122        }
1123        stop_progress(&progress);
1124
1125        /*
1126         * Create an array mapping each object to its packfile id.  Sort it
1127         * to group the objects by packfile.  Use this permutation to visit
1128         * each of the objects and only require 1 packfile to be open at a
1129         * time.
1130         */
1131        ALLOC_ARRAY(pairs, m->num_objects);
1132        for (i = 0; i < m->num_objects; i++) {
1133                pairs[i].pos = i;
1134                pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i);
1135        }
1136
1137        progress = start_sparse_progress(_("Sorting objects by packfile"),
1138                                         m->num_objects);
1139        display_progress(progress, 0); /* TODO: Measure QSORT() progress */
1140        QSORT(pairs, m->num_objects, compare_pair_pos_vs_id);
1141        stop_progress(&progress);
1142
1143        progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects);
1144        for (i = 0; i < m->num_objects; i++) {
1145                struct object_id oid;
1146                struct pack_entry e;
1147                off_t m_offset, p_offset;
1148
1149                if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id &&
1150                    m->packs[pairs[i-1].pack_int_id])
1151                {
1152                        close_pack_fd(m->packs[pairs[i-1].pack_int_id]);
1153                        close_pack_index(m->packs[pairs[i-1].pack_int_id]);
1154                }
1155
1156                nth_midxed_object_oid(&oid, m, pairs[i].pos);
1157
1158                if (!fill_midx_entry(r, &oid, &e, m)) {
1159                        midx_report(_("failed to load pack entry for oid[%d] = %s"),
1160                                    pairs[i].pos, oid_to_hex(&oid));
1161                        continue;
1162                }
1163
1164                if (open_pack_index(e.p)) {
1165                        midx_report(_("failed to load pack-index for packfile %s"),
1166                                    e.p->pack_name);
1167                        break;
1168                }
1169
1170                m_offset = e.offset;
1171                p_offset = find_pack_entry_one(oid.hash, e.p);
1172
1173                if (m_offset != p_offset)
1174                        midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1175                                    pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset);
1176
1177                midx_display_sparse_progress(progress, i + 1);
1178        }
1179        stop_progress(&progress);
1180
1181        free(pairs);
1182
1183        return verify_midx_error;
1184}
1185
1186int expire_midx_packs(struct repository *r, const char *object_dir)
1187{
1188        uint32_t i, *count, result = 0;
1189        struct string_list packs_to_drop = STRING_LIST_INIT_DUP;
1190        struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
1191
1192        if (!m)
1193                return 0;
1194
1195        count = xcalloc(m->num_packs, sizeof(uint32_t));
1196        for (i = 0; i < m->num_objects; i++) {
1197                int pack_int_id = nth_midxed_pack_int_id(m, i);
1198                count[pack_int_id]++;
1199        }
1200
1201        for (i = 0; i < m->num_packs; i++) {
1202                char *pack_name;
1203
1204                if (count[i])
1205                        continue;
1206
1207                if (prepare_midx_pack(r, m, i))
1208                        continue;
1209
1210                if (m->packs[i]->pack_keep)
1211                        continue;
1212
1213                pack_name = xstrdup(m->packs[i]->pack_name);
1214                close_pack(m->packs[i]);
1215
1216                string_list_insert(&packs_to_drop, m->pack_names[i]);
1217                unlink_pack_path(pack_name, 0);
1218                free(pack_name);
1219        }
1220
1221        free(count);
1222
1223        if (packs_to_drop.nr)
1224                result = write_midx_internal(object_dir, m, &packs_to_drop);
1225
1226        string_list_clear(&packs_to_drop, 0);
1227        return result;
1228}