3d652212c60633147c7df0c9d1acc135fc58d916
   1#include "cache.h"
   2#include "list.h"
   3#include "pack.h"
   4#include "repository.h"
   5#include "dir.h"
   6#include "mergesort.h"
   7#include "packfile.h"
   8#include "delta.h"
   9#include "list.h"
  10#include "streaming.h"
  11#include "sha1-lookup.h"
  12#include "commit.h"
  13#include "object.h"
  14#include "tag.h"
  15#include "tree-walk.h"
  16#include "tree.h"
  17#include "object-store.h"
  18
  19char *odb_pack_name(struct strbuf *buf,
  20                    const unsigned char *sha1,
  21                    const char *ext)
  22{
  23        strbuf_reset(buf);
  24        strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
  25                    sha1_to_hex(sha1), ext);
  26        return buf->buf;
  27}
  28
  29char *sha1_pack_name(const unsigned char *sha1)
  30{
  31        static struct strbuf buf = STRBUF_INIT;
  32        return odb_pack_name(&buf, sha1, "pack");
  33}
  34
  35char *sha1_pack_index_name(const unsigned char *sha1)
  36{
  37        static struct strbuf buf = STRBUF_INIT;
  38        return odb_pack_name(&buf, sha1, "idx");
  39}
  40
  41static unsigned int pack_used_ctr;
  42static unsigned int pack_mmap_calls;
  43static unsigned int peak_pack_open_windows;
  44static unsigned int pack_open_windows;
  45static unsigned int pack_open_fds;
  46static unsigned int pack_max_fds;
  47static size_t peak_pack_mapped;
  48static size_t pack_mapped;
  49
  50#define SZ_FMT PRIuMAX
  51static inline uintmax_t sz_fmt(size_t s) { return s; }
  52
  53void pack_report(void)
  54{
  55        fprintf(stderr,
  56                "pack_report: getpagesize()            = %10" SZ_FMT "\n"
  57                "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
  58                "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
  59                sz_fmt(getpagesize()),
  60                sz_fmt(packed_git_window_size),
  61                sz_fmt(packed_git_limit));
  62        fprintf(stderr,
  63                "pack_report: pack_used_ctr            = %10u\n"
  64                "pack_report: pack_mmap_calls          = %10u\n"
  65                "pack_report: pack_open_windows        = %10u / %10u\n"
  66                "pack_report: pack_mapped              = "
  67                        "%10" SZ_FMT " / %10" SZ_FMT "\n",
  68                pack_used_ctr,
  69                pack_mmap_calls,
  70                pack_open_windows, peak_pack_open_windows,
  71                sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
  72}
  73
  74/*
  75 * Open and mmap the index file at path, perform a couple of
  76 * consistency checks, then record its information to p.  Return 0 on
  77 * success.
  78 */
  79static int check_packed_git_idx(const char *path, struct packed_git *p)
  80{
  81        void *idx_map;
  82        struct pack_idx_header *hdr;
  83        size_t idx_size;
  84        uint32_t version, nr, i, *index;
  85        int fd = git_open(path);
  86        struct stat st;
  87        const unsigned int hashsz = the_hash_algo->rawsz;
  88
  89        if (fd < 0)
  90                return -1;
  91        if (fstat(fd, &st)) {
  92                close(fd);
  93                return -1;
  94        }
  95        idx_size = xsize_t(st.st_size);
  96        if (idx_size < 4 * 256 + hashsz + hashsz) {
  97                close(fd);
  98                return error("index file %s is too small", path);
  99        }
 100        idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 101        close(fd);
 102
 103        hdr = idx_map;
 104        if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
 105                version = ntohl(hdr->idx_version);
 106                if (version < 2 || version > 2) {
 107                        munmap(idx_map, idx_size);
 108                        return error("index file %s is version %"PRIu32
 109                                     " and is not supported by this binary"
 110                                     " (try upgrading GIT to a newer version)",
 111                                     path, version);
 112                }
 113        } else
 114                version = 1;
 115
 116        nr = 0;
 117        index = idx_map;
 118        if (version > 1)
 119                index += 2;  /* skip index header */
 120        for (i = 0; i < 256; i++) {
 121                uint32_t n = ntohl(index[i]);
 122                if (n < nr) {
 123                        munmap(idx_map, idx_size);
 124                        return error("non-monotonic index %s", path);
 125                }
 126                nr = n;
 127        }
 128
 129        if (version == 1) {
 130                /*
 131                 * Total size:
 132                 *  - 256 index entries 4 bytes each
 133                 *  - 24-byte entries * nr (object ID + 4-byte offset)
 134                 *  - hash of the packfile
 135                 *  - file checksum
 136                 */
 137                if (idx_size != 4*256 + nr * (hashsz + 4) + hashsz + hashsz) {
 138                        munmap(idx_map, idx_size);
 139                        return error("wrong index v1 file size in %s", path);
 140                }
 141        } else if (version == 2) {
 142                /*
 143                 * Minimum size:
 144                 *  - 8 bytes of header
 145                 *  - 256 index entries 4 bytes each
 146                 *  - object ID entry * nr
 147                 *  - 4-byte crc entry * nr
 148                 *  - 4-byte offset entry * nr
 149                 *  - hash of the packfile
 150                 *  - file checksum
 151                 * And after the 4-byte offset table might be a
 152                 * variable sized table containing 8-byte entries
 153                 * for offsets larger than 2^31.
 154                 */
 155                unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz;
 156                unsigned long max_size = min_size;
 157                if (nr)
 158                        max_size += (nr - 1)*8;
 159                if (idx_size < min_size || idx_size > max_size) {
 160                        munmap(idx_map, idx_size);
 161                        return error("wrong index v2 file size in %s", path);
 162                }
 163                if (idx_size != min_size &&
 164                    /*
 165                     * make sure we can deal with large pack offsets.
 166                     * 31-bit signed offset won't be enough, neither
 167                     * 32-bit unsigned one will be.
 168                     */
 169                    (sizeof(off_t) <= 4)) {
 170                        munmap(idx_map, idx_size);
 171                        return error("pack too large for current definition of off_t in %s", path);
 172                }
 173        }
 174
 175        p->index_version = version;
 176        p->index_data = idx_map;
 177        p->index_size = idx_size;
 178        p->num_objects = nr;
 179        return 0;
 180}
 181
 182int open_pack_index(struct packed_git *p)
 183{
 184        char *idx_name;
 185        size_t len;
 186        int ret;
 187
 188        if (p->index_data)
 189                return 0;
 190
 191        if (!strip_suffix(p->pack_name, ".pack", &len))
 192                BUG("pack_name does not end in .pack");
 193        idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
 194        ret = check_packed_git_idx(idx_name, p);
 195        free(idx_name);
 196        return ret;
 197}
 198
 199uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
 200{
 201        const uint32_t *level1_ofs = p->index_data;
 202
 203        if (!level1_ofs) {
 204                if (open_pack_index(p))
 205                        return 0;
 206                level1_ofs = p->index_data;
 207        }
 208
 209        if (p->index_version > 1) {
 210                level1_ofs += 2;
 211        }
 212
 213        return ntohl(level1_ofs[value]);
 214}
 215
 216static struct packed_git *alloc_packed_git(int extra)
 217{
 218        struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
 219        memset(p, 0, sizeof(*p));
 220        p->pack_fd = -1;
 221        return p;
 222}
 223
 224struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
 225{
 226        const char *path = sha1_pack_name(sha1);
 227        size_t alloc = st_add(strlen(path), 1);
 228        struct packed_git *p = alloc_packed_git(alloc);
 229
 230        memcpy(p->pack_name, path, alloc); /* includes NUL */
 231        hashcpy(p->sha1, sha1);
 232        if (check_packed_git_idx(idx_path, p)) {
 233                free(p);
 234                return NULL;
 235        }
 236
 237        return p;
 238}
 239
 240static void scan_windows(struct packed_git *p,
 241        struct packed_git **lru_p,
 242        struct pack_window **lru_w,
 243        struct pack_window **lru_l)
 244{
 245        struct pack_window *w, *w_l;
 246
 247        for (w_l = NULL, w = p->windows; w; w = w->next) {
 248                if (!w->inuse_cnt) {
 249                        if (!*lru_w || w->last_used < (*lru_w)->last_used) {
 250                                *lru_p = p;
 251                                *lru_w = w;
 252                                *lru_l = w_l;
 253                        }
 254                }
 255                w_l = w;
 256        }
 257}
 258
 259static int unuse_one_window(struct packed_git *current)
 260{
 261        struct packed_git *p, *lru_p = NULL;
 262        struct pack_window *lru_w = NULL, *lru_l = NULL;
 263
 264        if (current)
 265                scan_windows(current, &lru_p, &lru_w, &lru_l);
 266        for (p = the_repository->objects->packed_git; p; p = p->next)
 267                scan_windows(p, &lru_p, &lru_w, &lru_l);
 268        if (lru_p) {
 269                munmap(lru_w->base, lru_w->len);
 270                pack_mapped -= lru_w->len;
 271                if (lru_l)
 272                        lru_l->next = lru_w->next;
 273                else
 274                        lru_p->windows = lru_w->next;
 275                free(lru_w);
 276                pack_open_windows--;
 277                return 1;
 278        }
 279        return 0;
 280}
 281
 282void release_pack_memory(size_t need)
 283{
 284        size_t cur = pack_mapped;
 285        while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
 286                ; /* nothing */
 287}
 288
 289void close_pack_windows(struct packed_git *p)
 290{
 291        while (p->windows) {
 292                struct pack_window *w = p->windows;
 293
 294                if (w->inuse_cnt)
 295                        die("pack '%s' still has open windows to it",
 296                            p->pack_name);
 297                munmap(w->base, w->len);
 298                pack_mapped -= w->len;
 299                pack_open_windows--;
 300                p->windows = w->next;
 301                free(w);
 302        }
 303}
 304
 305static int close_pack_fd(struct packed_git *p)
 306{
 307        if (p->pack_fd < 0)
 308                return 0;
 309
 310        close(p->pack_fd);
 311        pack_open_fds--;
 312        p->pack_fd = -1;
 313
 314        return 1;
 315}
 316
 317void close_pack_index(struct packed_git *p)
 318{
 319        if (p->index_data) {
 320                munmap((void *)p->index_data, p->index_size);
 321                p->index_data = NULL;
 322        }
 323}
 324
 325void close_pack(struct packed_git *p)
 326{
 327        close_pack_windows(p);
 328        close_pack_fd(p);
 329        close_pack_index(p);
 330}
 331
 332void close_all_packs(struct raw_object_store *o)
 333{
 334        struct packed_git *p;
 335
 336        for (p = o->packed_git; p; p = p->next)
 337                if (p->do_not_close)
 338                        BUG("want to close pack marked 'do-not-close'");
 339                else
 340                        close_pack(p);
 341}
 342
 343/*
 344 * The LRU pack is the one with the oldest MRU window, preferring packs
 345 * with no used windows, or the oldest mtime if it has no windows allocated.
 346 */
 347static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
 348{
 349        struct pack_window *w, *this_mru_w;
 350        int has_windows_inuse = 0;
 351
 352        /*
 353         * Reject this pack if it has windows and the previously selected
 354         * one does not.  If this pack does not have windows, reject
 355         * it if the pack file is newer than the previously selected one.
 356         */
 357        if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
 358                return;
 359
 360        for (w = this_mru_w = p->windows; w; w = w->next) {
 361                /*
 362                 * Reject this pack if any of its windows are in use,
 363                 * but the previously selected pack did not have any
 364                 * inuse windows.  Otherwise, record that this pack
 365                 * has windows in use.
 366                 */
 367                if (w->inuse_cnt) {
 368                        if (*accept_windows_inuse)
 369                                has_windows_inuse = 1;
 370                        else
 371                                return;
 372                }
 373
 374                if (w->last_used > this_mru_w->last_used)
 375                        this_mru_w = w;
 376
 377                /*
 378                 * Reject this pack if it has windows that have been
 379                 * used more recently than the previously selected pack.
 380                 * If the previously selected pack had windows inuse and
 381                 * we have not encountered a window in this pack that is
 382                 * inuse, skip this check since we prefer a pack with no
 383                 * inuse windows to one that has inuse windows.
 384                 */
 385                if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
 386                    this_mru_w->last_used > (*mru_w)->last_used)
 387                        return;
 388        }
 389
 390        /*
 391         * Select this pack.
 392         */
 393        *mru_w = this_mru_w;
 394        *lru_p = p;
 395        *accept_windows_inuse = has_windows_inuse;
 396}
 397
 398static int close_one_pack(void)
 399{
 400        struct packed_git *p, *lru_p = NULL;
 401        struct pack_window *mru_w = NULL;
 402        int accept_windows_inuse = 1;
 403
 404        for (p = the_repository->objects->packed_git; p; p = p->next) {
 405                if (p->pack_fd == -1)
 406                        continue;
 407                find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
 408        }
 409
 410        if (lru_p)
 411                return close_pack_fd(lru_p);
 412
 413        return 0;
 414}
 415
 416static unsigned int get_max_fd_limit(void)
 417{
 418#ifdef RLIMIT_NOFILE
 419        {
 420                struct rlimit lim;
 421
 422                if (!getrlimit(RLIMIT_NOFILE, &lim))
 423                        return lim.rlim_cur;
 424        }
 425#endif
 426
 427#ifdef _SC_OPEN_MAX
 428        {
 429                long open_max = sysconf(_SC_OPEN_MAX);
 430                if (0 < open_max)
 431                        return open_max;
 432                /*
 433                 * Otherwise, we got -1 for one of the two
 434                 * reasons:
 435                 *
 436                 * (1) sysconf() did not understand _SC_OPEN_MAX
 437                 *     and signaled an error with -1; or
 438                 * (2) sysconf() said there is no limit.
 439                 *
 440                 * We _could_ clear errno before calling sysconf() to
 441                 * tell these two cases apart and return a huge number
 442                 * in the latter case to let the caller cap it to a
 443                 * value that is not so selfish, but letting the
 444                 * fallback OPEN_MAX codepath take care of these cases
 445                 * is a lot simpler.
 446                 */
 447        }
 448#endif
 449
 450#ifdef OPEN_MAX
 451        return OPEN_MAX;
 452#else
 453        return 1; /* see the caller ;-) */
 454#endif
 455}
 456
 457/*
 458 * Do not call this directly as this leaks p->pack_fd on error return;
 459 * call open_packed_git() instead.
 460 */
 461static int open_packed_git_1(struct packed_git *p)
 462{
 463        struct stat st;
 464        struct pack_header hdr;
 465        unsigned char hash[GIT_MAX_RAWSZ];
 466        unsigned char *idx_hash;
 467        long fd_flag;
 468        ssize_t read_result;
 469        const unsigned hashsz = the_hash_algo->rawsz;
 470
 471        if (!p->index_data && open_pack_index(p))
 472                return error("packfile %s index unavailable", p->pack_name);
 473
 474        if (!pack_max_fds) {
 475                unsigned int max_fds = get_max_fd_limit();
 476
 477                /* Save 3 for stdin/stdout/stderr, 22 for work */
 478                if (25 < max_fds)
 479                        pack_max_fds = max_fds - 25;
 480                else
 481                        pack_max_fds = 1;
 482        }
 483
 484        while (pack_max_fds <= pack_open_fds && close_one_pack())
 485                ; /* nothing */
 486
 487        p->pack_fd = git_open(p->pack_name);
 488        if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
 489                return -1;
 490        pack_open_fds++;
 491
 492        /* If we created the struct before we had the pack we lack size. */
 493        if (!p->pack_size) {
 494                if (!S_ISREG(st.st_mode))
 495                        return error("packfile %s not a regular file", p->pack_name);
 496                p->pack_size = st.st_size;
 497        } else if (p->pack_size != st.st_size)
 498                return error("packfile %s size changed", p->pack_name);
 499
 500        /* We leave these file descriptors open with sliding mmap;
 501         * there is no point keeping them open across exec(), though.
 502         */
 503        fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
 504        if (fd_flag < 0)
 505                return error("cannot determine file descriptor flags");
 506        fd_flag |= FD_CLOEXEC;
 507        if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
 508                return error("cannot set FD_CLOEXEC");
 509
 510        /* Verify we recognize this pack file format. */
 511        read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
 512        if (read_result < 0)
 513                return error_errno("error reading from %s", p->pack_name);
 514        if (read_result != sizeof(hdr))
 515                return error("file %s is far too short to be a packfile", p->pack_name);
 516        if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
 517                return error("file %s is not a GIT packfile", p->pack_name);
 518        if (!pack_version_ok(hdr.hdr_version))
 519                return error("packfile %s is version %"PRIu32" and not"
 520                        " supported (try upgrading GIT to a newer version)",
 521                        p->pack_name, ntohl(hdr.hdr_version));
 522
 523        /* Verify the pack matches its index. */
 524        if (p->num_objects != ntohl(hdr.hdr_entries))
 525                return error("packfile %s claims to have %"PRIu32" objects"
 526                             " while index indicates %"PRIu32" objects",
 527                             p->pack_name, ntohl(hdr.hdr_entries),
 528                             p->num_objects);
 529        if (lseek(p->pack_fd, p->pack_size - hashsz, SEEK_SET) == -1)
 530                return error("end of packfile %s is unavailable", p->pack_name);
 531        read_result = read_in_full(p->pack_fd, hash, hashsz);
 532        if (read_result < 0)
 533                return error_errno("error reading from %s", p->pack_name);
 534        if (read_result != hashsz)
 535                return error("packfile %s signature is unavailable", p->pack_name);
 536        idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
 537        if (hashcmp(hash, idx_hash))
 538                return error("packfile %s does not match index", p->pack_name);
 539        return 0;
 540}
 541
 542static int open_packed_git(struct packed_git *p)
 543{
 544        if (!open_packed_git_1(p))
 545                return 0;
 546        close_pack_fd(p);
 547        return -1;
 548}
 549
 550static int in_window(struct pack_window *win, off_t offset)
 551{
 552        /* We must promise at least one full hash after the
 553         * offset is available from this window, otherwise the offset
 554         * is not actually in this window and a different window (which
 555         * has that one hash excess) must be used.  This is to support
 556         * the object header and delta base parsing routines below.
 557         */
 558        off_t win_off = win->offset;
 559        return win_off <= offset
 560                && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
 561}
 562
 563unsigned char *use_pack(struct packed_git *p,
 564                struct pack_window **w_cursor,
 565                off_t offset,
 566                unsigned long *left)
 567{
 568        struct pack_window *win = *w_cursor;
 569
 570        /* Since packfiles end in a hash of their content and it's
 571         * pointless to ask for an offset into the middle of that
 572         * hash, and the in_window function above wouldn't match
 573         * don't allow an offset too close to the end of the file.
 574         */
 575        if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
 576                die("packfile %s cannot be accessed", p->pack_name);
 577        if (offset > (p->pack_size - the_hash_algo->rawsz))
 578                die("offset beyond end of packfile (truncated pack?)");
 579        if (offset < 0)
 580                die(_("offset before end of packfile (broken .idx?)"));
 581
 582        if (!win || !in_window(win, offset)) {
 583                if (win)
 584                        win->inuse_cnt--;
 585                for (win = p->windows; win; win = win->next) {
 586                        if (in_window(win, offset))
 587                                break;
 588                }
 589                if (!win) {
 590                        size_t window_align = packed_git_window_size / 2;
 591                        off_t len;
 592
 593                        if (p->pack_fd == -1 && open_packed_git(p))
 594                                die("packfile %s cannot be accessed", p->pack_name);
 595
 596                        win = xcalloc(1, sizeof(*win));
 597                        win->offset = (offset / window_align) * window_align;
 598                        len = p->pack_size - win->offset;
 599                        if (len > packed_git_window_size)
 600                                len = packed_git_window_size;
 601                        win->len = (size_t)len;
 602                        pack_mapped += win->len;
 603                        while (packed_git_limit < pack_mapped
 604                                && unuse_one_window(p))
 605                                ; /* nothing */
 606                        win->base = xmmap(NULL, win->len,
 607                                PROT_READ, MAP_PRIVATE,
 608                                p->pack_fd, win->offset);
 609                        if (win->base == MAP_FAILED)
 610                                die_errno("packfile %s cannot be mapped",
 611                                          p->pack_name);
 612                        if (!win->offset && win->len == p->pack_size
 613                                && !p->do_not_close)
 614                                close_pack_fd(p);
 615                        pack_mmap_calls++;
 616                        pack_open_windows++;
 617                        if (pack_mapped > peak_pack_mapped)
 618                                peak_pack_mapped = pack_mapped;
 619                        if (pack_open_windows > peak_pack_open_windows)
 620                                peak_pack_open_windows = pack_open_windows;
 621                        win->next = p->windows;
 622                        p->windows = win;
 623                }
 624        }
 625        if (win != *w_cursor) {
 626                win->last_used = pack_used_ctr++;
 627                win->inuse_cnt++;
 628                *w_cursor = win;
 629        }
 630        offset -= win->offset;
 631        if (left)
 632                *left = win->len - xsize_t(offset);
 633        return win->base + offset;
 634}
 635
 636void unuse_pack(struct pack_window **w_cursor)
 637{
 638        struct pack_window *w = *w_cursor;
 639        if (w) {
 640                w->inuse_cnt--;
 641                *w_cursor = NULL;
 642        }
 643}
 644
 645static void try_to_free_pack_memory(size_t size)
 646{
 647        release_pack_memory(size);
 648}
 649
 650struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 651{
 652        static int have_set_try_to_free_routine;
 653        struct stat st;
 654        size_t alloc;
 655        struct packed_git *p;
 656
 657        if (!have_set_try_to_free_routine) {
 658                have_set_try_to_free_routine = 1;
 659                set_try_to_free_routine(try_to_free_pack_memory);
 660        }
 661
 662        /*
 663         * Make sure a corresponding .pack file exists and that
 664         * the index looks sane.
 665         */
 666        if (!strip_suffix_mem(path, &path_len, ".idx"))
 667                return NULL;
 668
 669        /*
 670         * ".promisor" is long enough to hold any suffix we're adding (and
 671         * the use xsnprintf double-checks that)
 672         */
 673        alloc = st_add3(path_len, strlen(".promisor"), 1);
 674        p = alloc_packed_git(alloc);
 675        memcpy(p->pack_name, path, path_len);
 676
 677        xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
 678        if (!access(p->pack_name, F_OK))
 679                p->pack_keep = 1;
 680
 681        xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
 682        if (!access(p->pack_name, F_OK))
 683                p->pack_promisor = 1;
 684
 685        xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
 686        if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
 687                free(p);
 688                return NULL;
 689        }
 690
 691        /* ok, it looks sane as far as we can check without
 692         * actually mapping the pack file.
 693         */
 694        p->pack_size = st.st_size;
 695        p->pack_local = local;
 696        p->mtime = st.st_mtime;
 697        if (path_len < the_hash_algo->hexsz ||
 698            get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->sha1))
 699                hashclr(p->sha1);
 700        return p;
 701}
 702
 703void install_packed_git(struct repository *r, struct packed_git *pack)
 704{
 705        if (pack->pack_fd != -1)
 706                pack_open_fds++;
 707
 708        pack->next = r->objects->packed_git;
 709        r->objects->packed_git = pack;
 710}
 711
 712void (*report_garbage)(unsigned seen_bits, const char *path);
 713
 714static void report_helper(const struct string_list *list,
 715                          int seen_bits, int first, int last)
 716{
 717        if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
 718                return;
 719
 720        for (; first < last; first++)
 721                report_garbage(seen_bits, list->items[first].string);
 722}
 723
 724static void report_pack_garbage(struct string_list *list)
 725{
 726        int i, baselen = -1, first = 0, seen_bits = 0;
 727
 728        if (!report_garbage)
 729                return;
 730
 731        string_list_sort(list);
 732
 733        for (i = 0; i < list->nr; i++) {
 734                const char *path = list->items[i].string;
 735                if (baselen != -1 &&
 736                    strncmp(path, list->items[first].string, baselen)) {
 737                        report_helper(list, seen_bits, first, i);
 738                        baselen = -1;
 739                        seen_bits = 0;
 740                }
 741                if (baselen == -1) {
 742                        const char *dot = strrchr(path, '.');
 743                        if (!dot) {
 744                                report_garbage(PACKDIR_FILE_GARBAGE, path);
 745                                continue;
 746                        }
 747                        baselen = dot - path + 1;
 748                        first = i;
 749                }
 750                if (!strcmp(path + baselen, "pack"))
 751                        seen_bits |= 1;
 752                else if (!strcmp(path + baselen, "idx"))
 753                        seen_bits |= 2;
 754        }
 755        report_helper(list, seen_bits, first, list->nr);
 756}
 757
 758void for_each_file_in_pack_dir(const char *objdir,
 759                               each_file_in_pack_dir_fn fn,
 760                               void *data)
 761{
 762        struct strbuf path = STRBUF_INIT;
 763        size_t dirnamelen;
 764        DIR *dir;
 765        struct dirent *de;
 766
 767        strbuf_addstr(&path, objdir);
 768        strbuf_addstr(&path, "/pack");
 769        dir = opendir(path.buf);
 770        if (!dir) {
 771                if (errno != ENOENT)
 772                        error_errno("unable to open object pack directory: %s",
 773                                    path.buf);
 774                strbuf_release(&path);
 775                return;
 776        }
 777        strbuf_addch(&path, '/');
 778        dirnamelen = path.len;
 779        while ((de = readdir(dir)) != NULL) {
 780                if (is_dot_or_dotdot(de->d_name))
 781                        continue;
 782
 783                strbuf_setlen(&path, dirnamelen);
 784                strbuf_addstr(&path, de->d_name);
 785
 786                fn(path.buf, path.len, de->d_name, data);
 787        }
 788
 789        closedir(dir);
 790        strbuf_release(&path);
 791}
 792
 793struct prepare_pack_data {
 794        struct repository *r;
 795        struct string_list *garbage;
 796        int local;
 797};
 798
 799static void prepare_pack(const char *full_name, size_t full_name_len,
 800                         const char *file_name, void *_data)
 801{
 802        struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
 803        struct packed_git *p;
 804        size_t base_len = full_name_len;
 805
 806        if (strip_suffix_mem(full_name, &base_len, ".idx")) {
 807                /* Don't reopen a pack we already have. */
 808                for (p = data->r->objects->packed_git; p; p = p->next) {
 809                        size_t len;
 810                        if (strip_suffix(p->pack_name, ".pack", &len) &&
 811                            len == base_len &&
 812                            !memcmp(p->pack_name, full_name, len))
 813                                break;
 814                }
 815
 816                if (!p) {
 817                        p = add_packed_git(full_name, full_name_len, data->local);
 818                        if (p)
 819                                install_packed_git(data->r, p);
 820                }
 821        }
 822
 823        if (!report_garbage)
 824                return;
 825
 826        if (ends_with(file_name, ".idx") ||
 827            ends_with(file_name, ".pack") ||
 828            ends_with(file_name, ".bitmap") ||
 829            ends_with(file_name, ".keep") ||
 830            ends_with(file_name, ".promisor"))
 831                string_list_append(data->garbage, full_name);
 832        else
 833                report_garbage(PACKDIR_FILE_GARBAGE, full_name);
 834}
 835
 836static void prepare_packed_git_one(struct repository *r, char *objdir, int local)
 837{
 838        struct prepare_pack_data data;
 839        struct string_list garbage = STRING_LIST_INIT_DUP;
 840
 841        data.r = r;
 842        data.garbage = &garbage;
 843        data.local = local;
 844
 845        for_each_file_in_pack_dir(objdir, prepare_pack, &data);
 846
 847        report_pack_garbage(data.garbage);
 848        string_list_clear(data.garbage, 0);
 849}
 850
 851static void prepare_packed_git(struct repository *r);
 852/*
 853 * Give a fast, rough count of the number of objects in the repository. This
 854 * ignores loose objects completely. If you have a lot of them, then either
 855 * you should repack because your performance will be awful, or they are
 856 * all unreachable objects about to be pruned, in which case they're not really
 857 * interesting as a measure of repo size in the first place.
 858 */
 859unsigned long approximate_object_count(void)
 860{
 861        if (!the_repository->objects->approximate_object_count_valid) {
 862                unsigned long count;
 863                struct packed_git *p;
 864
 865                prepare_packed_git(the_repository);
 866                count = 0;
 867                for (p = the_repository->objects->packed_git; p; p = p->next) {
 868                        if (open_pack_index(p))
 869                                continue;
 870                        count += p->num_objects;
 871                }
 872                the_repository->objects->approximate_object_count = count;
 873        }
 874        return the_repository->objects->approximate_object_count;
 875}
 876
 877static void *get_next_packed_git(const void *p)
 878{
 879        return ((const struct packed_git *)p)->next;
 880}
 881
 882static void set_next_packed_git(void *p, void *next)
 883{
 884        ((struct packed_git *)p)->next = next;
 885}
 886
 887static int sort_pack(const void *a_, const void *b_)
 888{
 889        const struct packed_git *a = a_;
 890        const struct packed_git *b = b_;
 891        int st;
 892
 893        /*
 894         * Local packs tend to contain objects specific to our
 895         * variant of the project than remote ones.  In addition,
 896         * remote ones could be on a network mounted filesystem.
 897         * Favor local ones for these reasons.
 898         */
 899        st = a->pack_local - b->pack_local;
 900        if (st)
 901                return -st;
 902
 903        /*
 904         * Younger packs tend to contain more recent objects,
 905         * and more recent objects tend to get accessed more
 906         * often.
 907         */
 908        if (a->mtime < b->mtime)
 909                return 1;
 910        else if (a->mtime == b->mtime)
 911                return 0;
 912        return -1;
 913}
 914
 915static void rearrange_packed_git(struct repository *r)
 916{
 917        r->objects->packed_git = llist_mergesort(
 918                r->objects->packed_git, get_next_packed_git,
 919                set_next_packed_git, sort_pack);
 920}
 921
 922static void prepare_packed_git_mru(struct repository *r)
 923{
 924        struct packed_git *p;
 925
 926        INIT_LIST_HEAD(&r->objects->packed_git_mru);
 927
 928        for (p = r->objects->packed_git; p; p = p->next)
 929                list_add_tail(&p->mru, &r->objects->packed_git_mru);
 930}
 931
 932static void prepare_packed_git(struct repository *r)
 933{
 934        struct alternate_object_database *alt;
 935
 936        if (r->objects->packed_git_initialized)
 937                return;
 938        prepare_packed_git_one(r, r->objects->objectdir, 1);
 939        prepare_alt_odb(r);
 940        for (alt = r->objects->alt_odb_list; alt; alt = alt->next)
 941                prepare_packed_git_one(r, alt->path, 0);
 942        rearrange_packed_git(r);
 943        prepare_packed_git_mru(r);
 944        r->objects->packed_git_initialized = 1;
 945}
 946
 947void reprepare_packed_git(struct repository *r)
 948{
 949        r->objects->approximate_object_count_valid = 0;
 950        r->objects->packed_git_initialized = 0;
 951        prepare_packed_git(r);
 952}
 953
 954struct packed_git *get_packed_git(struct repository *r)
 955{
 956        prepare_packed_git(r);
 957        return r->objects->packed_git;
 958}
 959
 960struct list_head *get_packed_git_mru(struct repository *r)
 961{
 962        prepare_packed_git(r);
 963        return &r->objects->packed_git_mru;
 964}
 965
 966unsigned long unpack_object_header_buffer(const unsigned char *buf,
 967                unsigned long len, enum object_type *type, unsigned long *sizep)
 968{
 969        unsigned shift;
 970        unsigned long size, c;
 971        unsigned long used = 0;
 972
 973        c = buf[used++];
 974        *type = (c >> 4) & 7;
 975        size = c & 15;
 976        shift = 4;
 977        while (c & 0x80) {
 978                if (len <= used || bitsizeof(long) <= shift) {
 979                        error("bad object header");
 980                        size = used = 0;
 981                        break;
 982                }
 983                c = buf[used++];
 984                size += (c & 0x7f) << shift;
 985                shift += 7;
 986        }
 987        *sizep = size;
 988        return used;
 989}
 990
 991unsigned long get_size_from_delta(struct packed_git *p,
 992                                  struct pack_window **w_curs,
 993                                  off_t curpos)
 994{
 995        const unsigned char *data;
 996        unsigned char delta_head[20], *in;
 997        git_zstream stream;
 998        int st;
 999
1000        memset(&stream, 0, sizeof(stream));
1001        stream.next_out = delta_head;
1002        stream.avail_out = sizeof(delta_head);
1003
1004        git_inflate_init(&stream);
1005        do {
1006                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1007                stream.next_in = in;
1008                st = git_inflate(&stream, Z_FINISH);
1009                curpos += stream.next_in - in;
1010        } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1011                 stream.total_out < sizeof(delta_head));
1012        git_inflate_end(&stream);
1013        if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1014                error("delta data unpack-initial failed");
1015                return 0;
1016        }
1017
1018        /* Examine the initial part of the delta to figure out
1019         * the result size.
1020         */
1021        data = delta_head;
1022
1023        /* ignore base size */
1024        get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1025
1026        /* Read the result size */
1027        return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1028}
1029
1030int unpack_object_header(struct packed_git *p,
1031                         struct pack_window **w_curs,
1032                         off_t *curpos,
1033                         unsigned long *sizep)
1034{
1035        unsigned char *base;
1036        unsigned long left;
1037        unsigned long used;
1038        enum object_type type;
1039
1040        /* use_pack() assures us we have [base, base + 20) available
1041         * as a range that we can look at.  (Its actually the hash
1042         * size that is assured.)  With our object header encoding
1043         * the maximum deflated object size is 2^137, which is just
1044         * insane, so we know won't exceed what we have been given.
1045         */
1046        base = use_pack(p, w_curs, *curpos, &left);
1047        used = unpack_object_header_buffer(base, left, &type, sizep);
1048        if (!used) {
1049                type = OBJ_BAD;
1050        } else
1051                *curpos += used;
1052
1053        return type;
1054}
1055
1056void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
1057{
1058        unsigned i;
1059        for (i = 0; i < p->num_bad_objects; i++)
1060                if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1061                        return;
1062        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
1063                                      st_mult(GIT_MAX_RAWSZ,
1064                                              st_add(p->num_bad_objects, 1)));
1065        hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1066        p->num_bad_objects++;
1067}
1068
1069const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1070{
1071        struct packed_git *p;
1072        unsigned i;
1073
1074        for (p = the_repository->objects->packed_git; p; p = p->next)
1075                for (i = 0; i < p->num_bad_objects; i++)
1076                        if (!hashcmp(sha1,
1077                                     p->bad_object_sha1 + the_hash_algo->rawsz * i))
1078                                return p;
1079        return NULL;
1080}
1081
1082static off_t get_delta_base(struct packed_git *p,
1083                                    struct pack_window **w_curs,
1084                                    off_t *curpos,
1085                                    enum object_type type,
1086                                    off_t delta_obj_offset)
1087{
1088        unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1089        off_t base_offset;
1090
1091        /* use_pack() assured us we have [base_info, base_info + 20)
1092         * as a range that we can look at without walking off the
1093         * end of the mapped window.  Its actually the hash size
1094         * that is assured.  An OFS_DELTA longer than the hash size
1095         * is stupid, as then a REF_DELTA would be smaller to store.
1096         */
1097        if (type == OBJ_OFS_DELTA) {
1098                unsigned used = 0;
1099                unsigned char c = base_info[used++];
1100                base_offset = c & 127;
1101                while (c & 128) {
1102                        base_offset += 1;
1103                        if (!base_offset || MSB(base_offset, 7))
1104                                return 0;  /* overflow */
1105                        c = base_info[used++];
1106                        base_offset = (base_offset << 7) + (c & 127);
1107                }
1108                base_offset = delta_obj_offset - base_offset;
1109                if (base_offset <= 0 || base_offset >= delta_obj_offset)
1110                        return 0;  /* out of bound */
1111                *curpos += used;
1112        } else if (type == OBJ_REF_DELTA) {
1113                /* The base entry _must_ be in the same pack */
1114                base_offset = find_pack_entry_one(base_info, p);
1115                *curpos += the_hash_algo->rawsz;
1116        } else
1117                die("I am totally screwed");
1118        return base_offset;
1119}
1120
1121/*
1122 * Like get_delta_base above, but we return the sha1 instead of the pack
1123 * offset. This means it is cheaper for REF deltas (we do not have to do
1124 * the final object lookup), but more expensive for OFS deltas (we
1125 * have to load the revidx to convert the offset back into a sha1).
1126 */
1127static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1128                                                struct pack_window **w_curs,
1129                                                off_t curpos,
1130                                                enum object_type type,
1131                                                off_t delta_obj_offset)
1132{
1133        if (type == OBJ_REF_DELTA) {
1134                unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1135                return base;
1136        } else if (type == OBJ_OFS_DELTA) {
1137                struct revindex_entry *revidx;
1138                off_t base_offset = get_delta_base(p, w_curs, &curpos,
1139                                                   type, delta_obj_offset);
1140
1141                if (!base_offset)
1142                        return NULL;
1143
1144                revidx = find_pack_revindex(p, base_offset);
1145                if (!revidx)
1146                        return NULL;
1147
1148                return nth_packed_object_sha1(p, revidx->nr);
1149        } else
1150                return NULL;
1151}
1152
1153static int retry_bad_packed_offset(struct repository *r,
1154                                   struct packed_git *p,
1155                                   off_t obj_offset)
1156{
1157        int type;
1158        struct revindex_entry *revidx;
1159        struct object_id oid;
1160        revidx = find_pack_revindex(p, obj_offset);
1161        if (!revidx)
1162                return OBJ_BAD;
1163        nth_packed_object_oid(&oid, p, revidx->nr);
1164        mark_bad_packed_object(p, oid.hash);
1165        type = oid_object_info(r, &oid, NULL);
1166        if (type <= OBJ_NONE)
1167                return OBJ_BAD;
1168        return type;
1169}
1170
1171#define POI_STACK_PREALLOC 64
1172
1173static enum object_type packed_to_object_type(struct repository *r,
1174                                              struct packed_git *p,
1175                                              off_t obj_offset,
1176                                              enum object_type type,
1177                                              struct pack_window **w_curs,
1178                                              off_t curpos)
1179{
1180        off_t small_poi_stack[POI_STACK_PREALLOC];
1181        off_t *poi_stack = small_poi_stack;
1182        int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1183
1184        while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1185                off_t base_offset;
1186                unsigned long size;
1187                /* Push the object we're going to leave behind */
1188                if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1189                        poi_stack_alloc = alloc_nr(poi_stack_nr);
1190                        ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1191                        memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1192                } else {
1193                        ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1194                }
1195                poi_stack[poi_stack_nr++] = obj_offset;
1196                /* If parsing the base offset fails, just unwind */
1197                base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1198                if (!base_offset)
1199                        goto unwind;
1200                curpos = obj_offset = base_offset;
1201                type = unpack_object_header(p, w_curs, &curpos, &size);
1202                if (type <= OBJ_NONE) {
1203                        /* If getting the base itself fails, we first
1204                         * retry the base, otherwise unwind */
1205                        type = retry_bad_packed_offset(r, p, base_offset);
1206                        if (type > OBJ_NONE)
1207                                goto out;
1208                        goto unwind;
1209                }
1210        }
1211
1212        switch (type) {
1213        case OBJ_BAD:
1214        case OBJ_COMMIT:
1215        case OBJ_TREE:
1216        case OBJ_BLOB:
1217        case OBJ_TAG:
1218                break;
1219        default:
1220                error("unknown object type %i at offset %"PRIuMAX" in %s",
1221                      type, (uintmax_t)obj_offset, p->pack_name);
1222                type = OBJ_BAD;
1223        }
1224
1225out:
1226        if (poi_stack != small_poi_stack)
1227                free(poi_stack);
1228        return type;
1229
1230unwind:
1231        while (poi_stack_nr) {
1232                obj_offset = poi_stack[--poi_stack_nr];
1233                type = retry_bad_packed_offset(r, p, obj_offset);
1234                if (type > OBJ_NONE)
1235                        goto out;
1236        }
1237        type = OBJ_BAD;
1238        goto out;
1239}
1240
1241static struct hashmap delta_base_cache;
1242static size_t delta_base_cached;
1243
1244static LIST_HEAD(delta_base_cache_lru);
1245
1246struct delta_base_cache_key {
1247        struct packed_git *p;
1248        off_t base_offset;
1249};
1250
1251struct delta_base_cache_entry {
1252        struct hashmap hash;
1253        struct delta_base_cache_key key;
1254        struct list_head lru;
1255        void *data;
1256        unsigned long size;
1257        enum object_type type;
1258};
1259
1260static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1261{
1262        unsigned int hash;
1263
1264        hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1265        hash += (hash >> 8) + (hash >> 16);
1266        return hash;
1267}
1268
1269static struct delta_base_cache_entry *
1270get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1271{
1272        struct hashmap_entry entry;
1273        struct delta_base_cache_key key;
1274
1275        if (!delta_base_cache.cmpfn)
1276                return NULL;
1277
1278        hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1279        key.p = p;
1280        key.base_offset = base_offset;
1281        return hashmap_get(&delta_base_cache, &entry, &key);
1282}
1283
1284static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1285                                   const struct delta_base_cache_key *b)
1286{
1287        return a->p == b->p && a->base_offset == b->base_offset;
1288}
1289
1290static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1291                                     const void *va, const void *vb,
1292                                     const void *vkey)
1293{
1294        const struct delta_base_cache_entry *a = va, *b = vb;
1295        const struct delta_base_cache_key *key = vkey;
1296        if (key)
1297                return !delta_base_cache_key_eq(&a->key, key);
1298        else
1299                return !delta_base_cache_key_eq(&a->key, &b->key);
1300}
1301
1302static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1303{
1304        return !!get_delta_base_cache_entry(p, base_offset);
1305}
1306
1307/*
1308 * Remove the entry from the cache, but do _not_ free the associated
1309 * entry data. The caller takes ownership of the "data" buffer, and
1310 * should copy out any fields it wants before detaching.
1311 */
1312static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1313{
1314        hashmap_remove(&delta_base_cache, ent, &ent->key);
1315        list_del(&ent->lru);
1316        delta_base_cached -= ent->size;
1317        free(ent);
1318}
1319
1320static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
1321                                   off_t base_offset, unsigned long *base_size,
1322                                   enum object_type *type)
1323{
1324        struct delta_base_cache_entry *ent;
1325
1326        ent = get_delta_base_cache_entry(p, base_offset);
1327        if (!ent)
1328                return unpack_entry(r, p, base_offset, type, base_size);
1329
1330        if (type)
1331                *type = ent->type;
1332        if (base_size)
1333                *base_size = ent->size;
1334        return xmemdupz(ent->data, ent->size);
1335}
1336
1337static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1338{
1339        free(ent->data);
1340        detach_delta_base_cache_entry(ent);
1341}
1342
1343void clear_delta_base_cache(void)
1344{
1345        struct list_head *lru, *tmp;
1346        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1347                struct delta_base_cache_entry *entry =
1348                        list_entry(lru, struct delta_base_cache_entry, lru);
1349                release_delta_base_cache(entry);
1350        }
1351}
1352
1353static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1354        void *base, unsigned long base_size, enum object_type type)
1355{
1356        struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1357        struct list_head *lru, *tmp;
1358
1359        delta_base_cached += base_size;
1360
1361        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1362                struct delta_base_cache_entry *f =
1363                        list_entry(lru, struct delta_base_cache_entry, lru);
1364                if (delta_base_cached <= delta_base_cache_limit)
1365                        break;
1366                release_delta_base_cache(f);
1367        }
1368
1369        ent->key.p = p;
1370        ent->key.base_offset = base_offset;
1371        ent->type = type;
1372        ent->data = base;
1373        ent->size = base_size;
1374        list_add_tail(&ent->lru, &delta_base_cache_lru);
1375
1376        if (!delta_base_cache.cmpfn)
1377                hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1378        hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1379        hashmap_add(&delta_base_cache, ent);
1380}
1381
1382int packed_object_info(struct repository *r, struct packed_git *p,
1383                       off_t obj_offset, struct object_info *oi)
1384{
1385        struct pack_window *w_curs = NULL;
1386        unsigned long size;
1387        off_t curpos = obj_offset;
1388        enum object_type type;
1389
1390        /*
1391         * We always get the representation type, but only convert it to
1392         * a "real" type later if the caller is interested.
1393         */
1394        if (oi->contentp) {
1395                *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
1396                                                      &type);
1397                if (!*oi->contentp)
1398                        type = OBJ_BAD;
1399        } else {
1400                type = unpack_object_header(p, &w_curs, &curpos, &size);
1401        }
1402
1403        if (!oi->contentp && oi->sizep) {
1404                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1405                        off_t tmp_pos = curpos;
1406                        off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1407                                                           type, obj_offset);
1408                        if (!base_offset) {
1409                                type = OBJ_BAD;
1410                                goto out;
1411                        }
1412                        *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1413                        if (*oi->sizep == 0) {
1414                                type = OBJ_BAD;
1415                                goto out;
1416                        }
1417                } else {
1418                        *oi->sizep = size;
1419                }
1420        }
1421
1422        if (oi->disk_sizep) {
1423                struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1424                *oi->disk_sizep = revidx[1].offset - obj_offset;
1425        }
1426
1427        if (oi->typep || oi->type_name) {
1428                enum object_type ptot;
1429                ptot = packed_to_object_type(r, p, obj_offset,
1430                                             type, &w_curs, curpos);
1431                if (oi->typep)
1432                        *oi->typep = ptot;
1433                if (oi->type_name) {
1434                        const char *tn = type_name(ptot);
1435                        if (tn)
1436                                strbuf_addstr(oi->type_name, tn);
1437                }
1438                if (ptot < 0) {
1439                        type = OBJ_BAD;
1440                        goto out;
1441                }
1442        }
1443
1444        if (oi->delta_base_sha1) {
1445                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1446                        const unsigned char *base;
1447
1448                        base = get_delta_base_sha1(p, &w_curs, curpos,
1449                                                   type, obj_offset);
1450                        if (!base) {
1451                                type = OBJ_BAD;
1452                                goto out;
1453                        }
1454
1455                        hashcpy(oi->delta_base_sha1, base);
1456                } else
1457                        hashclr(oi->delta_base_sha1);
1458        }
1459
1460        oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1461                                                          OI_PACKED;
1462
1463out:
1464        unuse_pack(&w_curs);
1465        return type;
1466}
1467
1468static void *unpack_compressed_entry(struct packed_git *p,
1469                                    struct pack_window **w_curs,
1470                                    off_t curpos,
1471                                    unsigned long size)
1472{
1473        int st;
1474        git_zstream stream;
1475        unsigned char *buffer, *in;
1476
1477        buffer = xmallocz_gently(size);
1478        if (!buffer)
1479                return NULL;
1480        memset(&stream, 0, sizeof(stream));
1481        stream.next_out = buffer;
1482        stream.avail_out = size + 1;
1483
1484        git_inflate_init(&stream);
1485        do {
1486                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1487                stream.next_in = in;
1488                st = git_inflate(&stream, Z_FINISH);
1489                if (!stream.avail_out)
1490                        break; /* the payload is larger than it should be */
1491                curpos += stream.next_in - in;
1492        } while (st == Z_OK || st == Z_BUF_ERROR);
1493        git_inflate_end(&stream);
1494        if ((st != Z_STREAM_END) || stream.total_out != size) {
1495                free(buffer);
1496                return NULL;
1497        }
1498
1499        /* versions of zlib can clobber unconsumed portion of outbuf */
1500        buffer[size] = '\0';
1501
1502        return buffer;
1503}
1504
1505static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1506{
1507        static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1508        trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1509                         p->pack_name, (uintmax_t)obj_offset);
1510}
1511
1512int do_check_packed_object_crc;
1513
1514#define UNPACK_ENTRY_STACK_PREALLOC 64
1515struct unpack_entry_stack_ent {
1516        off_t obj_offset;
1517        off_t curpos;
1518        unsigned long size;
1519};
1520
1521static void *read_object(struct repository *r,
1522                         const struct object_id *oid,
1523                         enum object_type *type,
1524                         unsigned long *size)
1525{
1526        struct object_info oi = OBJECT_INFO_INIT;
1527        void *content;
1528        oi.typep = type;
1529        oi.sizep = size;
1530        oi.contentp = &content;
1531
1532        if (oid_object_info_extended(r, oid, &oi, 0) < 0)
1533                return NULL;
1534        return content;
1535}
1536
1537void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
1538                   enum object_type *final_type, unsigned long *final_size)
1539{
1540        struct pack_window *w_curs = NULL;
1541        off_t curpos = obj_offset;
1542        void *data = NULL;
1543        unsigned long size;
1544        enum object_type type;
1545        struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1546        struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1547        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1548        int base_from_cache = 0;
1549
1550        write_pack_access_log(p, obj_offset);
1551
1552        /* PHASE 1: drill down to the innermost base object */
1553        for (;;) {
1554                off_t base_offset;
1555                int i;
1556                struct delta_base_cache_entry *ent;
1557
1558                ent = get_delta_base_cache_entry(p, curpos);
1559                if (ent) {
1560                        type = ent->type;
1561                        data = ent->data;
1562                        size = ent->size;
1563                        detach_delta_base_cache_entry(ent);
1564                        base_from_cache = 1;
1565                        break;
1566                }
1567
1568                if (do_check_packed_object_crc && p->index_version > 1) {
1569                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1570                        off_t len = revidx[1].offset - obj_offset;
1571                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1572                                struct object_id oid;
1573                                nth_packed_object_oid(&oid, p, revidx->nr);
1574                                error("bad packed object CRC for %s",
1575                                      oid_to_hex(&oid));
1576                                mark_bad_packed_object(p, oid.hash);
1577                                data = NULL;
1578                                goto out;
1579                        }
1580                }
1581
1582                type = unpack_object_header(p, &w_curs, &curpos, &size);
1583                if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1584                        break;
1585
1586                base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1587                if (!base_offset) {
1588                        error("failed to validate delta base reference "
1589                              "at offset %"PRIuMAX" from %s",
1590                              (uintmax_t)curpos, p->pack_name);
1591                        /* bail to phase 2, in hopes of recovery */
1592                        data = NULL;
1593                        break;
1594                }
1595
1596                /* push object, proceed to base */
1597                if (delta_stack_nr >= delta_stack_alloc
1598                    && delta_stack == small_delta_stack) {
1599                        delta_stack_alloc = alloc_nr(delta_stack_nr);
1600                        ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1601                        memcpy(delta_stack, small_delta_stack,
1602                               sizeof(*delta_stack)*delta_stack_nr);
1603                } else {
1604                        ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1605                }
1606                i = delta_stack_nr++;
1607                delta_stack[i].obj_offset = obj_offset;
1608                delta_stack[i].curpos = curpos;
1609                delta_stack[i].size = size;
1610
1611                curpos = obj_offset = base_offset;
1612        }
1613
1614        /* PHASE 2: handle the base */
1615        switch (type) {
1616        case OBJ_OFS_DELTA:
1617        case OBJ_REF_DELTA:
1618                if (data)
1619                        BUG("unpack_entry: left loop at a valid delta");
1620                break;
1621        case OBJ_COMMIT:
1622        case OBJ_TREE:
1623        case OBJ_BLOB:
1624        case OBJ_TAG:
1625                if (!base_from_cache)
1626                        data = unpack_compressed_entry(p, &w_curs, curpos, size);
1627                break;
1628        default:
1629                data = NULL;
1630                error("unknown object type %i at offset %"PRIuMAX" in %s",
1631                      type, (uintmax_t)obj_offset, p->pack_name);
1632        }
1633
1634        /* PHASE 3: apply deltas in order */
1635
1636        /* invariants:
1637         *   'data' holds the base data, or NULL if there was corruption
1638         */
1639        while (delta_stack_nr) {
1640                void *delta_data;
1641                void *base = data;
1642                void *external_base = NULL;
1643                unsigned long delta_size, base_size = size;
1644                int i;
1645
1646                data = NULL;
1647
1648                if (base)
1649                        add_delta_base_cache(p, obj_offset, base, base_size, type);
1650
1651                if (!base) {
1652                        /*
1653                         * We're probably in deep shit, but let's try to fetch
1654                         * the required base anyway from another pack or loose.
1655                         * This is costly but should happen only in the presence
1656                         * of a corrupted pack, and is better than failing outright.
1657                         */
1658                        struct revindex_entry *revidx;
1659                        struct object_id base_oid;
1660                        revidx = find_pack_revindex(p, obj_offset);
1661                        if (revidx) {
1662                                nth_packed_object_oid(&base_oid, p, revidx->nr);
1663                                error("failed to read delta base object %s"
1664                                      " at offset %"PRIuMAX" from %s",
1665                                      oid_to_hex(&base_oid), (uintmax_t)obj_offset,
1666                                      p->pack_name);
1667                                mark_bad_packed_object(p, base_oid.hash);
1668                                base = read_object(r, &base_oid, &type, &base_size);
1669                                external_base = base;
1670                        }
1671                }
1672
1673                i = --delta_stack_nr;
1674                obj_offset = delta_stack[i].obj_offset;
1675                curpos = delta_stack[i].curpos;
1676                delta_size = delta_stack[i].size;
1677
1678                if (!base)
1679                        continue;
1680
1681                delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1682
1683                if (!delta_data) {
1684                        error("failed to unpack compressed delta "
1685                              "at offset %"PRIuMAX" from %s",
1686                              (uintmax_t)curpos, p->pack_name);
1687                        data = NULL;
1688                        free(external_base);
1689                        continue;
1690                }
1691
1692                data = patch_delta(base, base_size,
1693                                   delta_data, delta_size,
1694                                   &size);
1695
1696                /*
1697                 * We could not apply the delta; warn the user, but keep going.
1698                 * Our failure will be noticed either in the next iteration of
1699                 * the loop, or if this is the final delta, in the caller when
1700                 * we return NULL. Those code paths will take care of making
1701                 * a more explicit warning and retrying with another copy of
1702                 * the object.
1703                 */
1704                if (!data)
1705                        error("failed to apply delta");
1706
1707                free(delta_data);
1708                free(external_base);
1709        }
1710
1711        if (final_type)
1712                *final_type = type;
1713        if (final_size)
1714                *final_size = size;
1715
1716out:
1717        unuse_pack(&w_curs);
1718
1719        if (delta_stack != small_delta_stack)
1720                free(delta_stack);
1721
1722        return data;
1723}
1724
1725int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
1726{
1727        const unsigned char *index_fanout = p->index_data;
1728        const unsigned char *index_lookup;
1729        const unsigned int hashsz = the_hash_algo->rawsz;
1730        int index_lookup_width;
1731
1732        if (!index_fanout)
1733                BUG("bsearch_pack called without a valid pack-index");
1734
1735        index_lookup = index_fanout + 4 * 256;
1736        if (p->index_version == 1) {
1737                index_lookup_width = hashsz + 4;
1738                index_lookup += 4;
1739        } else {
1740                index_lookup_width = hashsz;
1741                index_fanout += 8;
1742                index_lookup += 8;
1743        }
1744
1745        return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,
1746                            index_lookup, index_lookup_width, result);
1747}
1748
1749const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1750                                            uint32_t n)
1751{
1752        const unsigned char *index = p->index_data;
1753        const unsigned int hashsz = the_hash_algo->rawsz;
1754        if (!index) {
1755                if (open_pack_index(p))
1756                        return NULL;
1757                index = p->index_data;
1758        }
1759        if (n >= p->num_objects)
1760                return NULL;
1761        index += 4 * 256;
1762        if (p->index_version == 1) {
1763                return index + (hashsz + 4) * n + 4;
1764        } else {
1765                index += 8;
1766                return index + hashsz * n;
1767        }
1768}
1769
1770const struct object_id *nth_packed_object_oid(struct object_id *oid,
1771                                              struct packed_git *p,
1772                                              uint32_t n)
1773{
1774        const unsigned char *hash = nth_packed_object_sha1(p, n);
1775        if (!hash)
1776                return NULL;
1777        hashcpy(oid->hash, hash);
1778        return oid;
1779}
1780
1781void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1782{
1783        const unsigned char *ptr = vptr;
1784        const unsigned char *start = p->index_data;
1785        const unsigned char *end = start + p->index_size;
1786        if (ptr < start)
1787                die(_("offset before start of pack index for %s (corrupt index?)"),
1788                    p->pack_name);
1789        /* No need to check for underflow; .idx files must be at least 8 bytes */
1790        if (ptr >= end - 8)
1791                die(_("offset beyond end of pack index for %s (truncated index?)"),
1792                    p->pack_name);
1793}
1794
1795off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1796{
1797        const unsigned char *index = p->index_data;
1798        const unsigned int hashsz = the_hash_algo->rawsz;
1799        index += 4 * 256;
1800        if (p->index_version == 1) {
1801                return ntohl(*((uint32_t *)(index + (hashsz + 4) * n)));
1802        } else {
1803                uint32_t off;
1804                index += 8 + p->num_objects * (hashsz + 4);
1805                off = ntohl(*((uint32_t *)(index + 4 * n)));
1806                if (!(off & 0x80000000))
1807                        return off;
1808                index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1809                check_pack_index_ptr(p, index);
1810                return get_be64(index);
1811        }
1812}
1813
1814off_t find_pack_entry_one(const unsigned char *sha1,
1815                                  struct packed_git *p)
1816{
1817        const unsigned char *index = p->index_data;
1818        struct object_id oid;
1819        uint32_t result;
1820
1821        if (!index) {
1822                if (open_pack_index(p))
1823                        return 0;
1824        }
1825
1826        hashcpy(oid.hash, sha1);
1827        if (bsearch_pack(&oid, p, &result))
1828                return nth_packed_object_offset(p, result);
1829        return 0;
1830}
1831
1832int is_pack_valid(struct packed_git *p)
1833{
1834        /* An already open pack is known to be valid. */
1835        if (p->pack_fd != -1)
1836                return 1;
1837
1838        /* If the pack has one window completely covering the
1839         * file size, the pack is known to be valid even if
1840         * the descriptor is not currently open.
1841         */
1842        if (p->windows) {
1843                struct pack_window *w = p->windows;
1844
1845                if (!w->offset && w->len == p->pack_size)
1846                        return 1;
1847        }
1848
1849        /* Force the pack to open to prove its valid. */
1850        return !open_packed_git(p);
1851}
1852
1853struct packed_git *find_sha1_pack(const unsigned char *sha1,
1854                                  struct packed_git *packs)
1855{
1856        struct packed_git *p;
1857
1858        for (p = packs; p; p = p->next) {
1859                if (find_pack_entry_one(sha1, p))
1860                        return p;
1861        }
1862        return NULL;
1863
1864}
1865
1866static int fill_pack_entry(const struct object_id *oid,
1867                           struct pack_entry *e,
1868                           struct packed_git *p)
1869{
1870        off_t offset;
1871
1872        if (p->num_bad_objects) {
1873                unsigned i;
1874                for (i = 0; i < p->num_bad_objects; i++)
1875                        if (!hashcmp(oid->hash,
1876                                     p->bad_object_sha1 + the_hash_algo->rawsz * i))
1877                                return 0;
1878        }
1879
1880        offset = find_pack_entry_one(oid->hash, p);
1881        if (!offset)
1882                return 0;
1883
1884        /*
1885         * We are about to tell the caller where they can locate the
1886         * requested object.  We better make sure the packfile is
1887         * still here and can be accessed before supplying that
1888         * answer, as it may have been deleted since the index was
1889         * loaded!
1890         */
1891        if (!is_pack_valid(p))
1892                return 0;
1893        e->offset = offset;
1894        e->p = p;
1895        return 1;
1896}
1897
1898int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
1899{
1900        struct list_head *pos;
1901
1902        prepare_packed_git(r);
1903        if (!r->objects->packed_git)
1904                return 0;
1905
1906        list_for_each(pos, &r->objects->packed_git_mru) {
1907                struct packed_git *p = list_entry(pos, struct packed_git, mru);
1908                if (fill_pack_entry(oid, e, p)) {
1909                        list_move(&p->mru, &r->objects->packed_git_mru);
1910                        return 1;
1911                }
1912        }
1913        return 0;
1914}
1915
1916int has_object_pack(const struct object_id *oid)
1917{
1918        struct pack_entry e;
1919        return find_pack_entry(the_repository, oid, &e);
1920}
1921
1922int has_pack_index(const unsigned char *sha1)
1923{
1924        struct stat st;
1925        if (stat(sha1_pack_index_name(sha1), &st))
1926                return 0;
1927        return 1;
1928}
1929
1930int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1931{
1932        uint32_t i;
1933        int r = 0;
1934
1935        for (i = 0; i < p->num_objects; i++) {
1936                struct object_id oid;
1937
1938                if (!nth_packed_object_oid(&oid, p, i))
1939                        return error("unable to get sha1 of object %u in %s",
1940                                     i, p->pack_name);
1941
1942                r = cb(&oid, p, i, data);
1943                if (r)
1944                        break;
1945        }
1946        return r;
1947}
1948
1949int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1950{
1951        struct packed_git *p;
1952        int r = 0;
1953        int pack_errors = 0;
1954
1955        prepare_packed_git(the_repository);
1956        for (p = the_repository->objects->packed_git; p; p = p->next) {
1957                if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1958                        continue;
1959                if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
1960                    !p->pack_promisor)
1961                        continue;
1962                if (open_pack_index(p)) {
1963                        pack_errors = 1;
1964                        continue;
1965                }
1966                r = for_each_object_in_pack(p, cb, data);
1967                if (r)
1968                        break;
1969        }
1970        return r ? r : pack_errors;
1971}
1972
1973static int add_promisor_object(const struct object_id *oid,
1974                               struct packed_git *pack,
1975                               uint32_t pos,
1976                               void *set_)
1977{
1978        struct oidset *set = set_;
1979        struct object *obj = parse_object(oid);
1980        if (!obj)
1981                return 1;
1982
1983        oidset_insert(set, oid);
1984
1985        /*
1986         * If this is a tree, commit, or tag, the objects it refers
1987         * to are also promisor objects. (Blobs refer to no objects->)
1988         */
1989        if (obj->type == OBJ_TREE) {
1990                struct tree *tree = (struct tree *)obj;
1991                struct tree_desc desc;
1992                struct name_entry entry;
1993                if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
1994                        /*
1995                         * Error messages are given when packs are
1996                         * verified, so do not print any here.
1997                         */
1998                        return 0;
1999                while (tree_entry_gently(&desc, &entry))
2000                        oidset_insert(set, entry.oid);
2001        } else if (obj->type == OBJ_COMMIT) {
2002                struct commit *commit = (struct commit *) obj;
2003                struct commit_list *parents = commit->parents;
2004
2005                oidset_insert(set, get_commit_tree_oid(commit));
2006                for (; parents; parents = parents->next)
2007                        oidset_insert(set, &parents->item->object.oid);
2008        } else if (obj->type == OBJ_TAG) {
2009                struct tag *tag = (struct tag *) obj;
2010                oidset_insert(set, &tag->tagged->oid);
2011        }
2012        return 0;
2013}
2014
2015int is_promisor_object(const struct object_id *oid)
2016{
2017        static struct oidset promisor_objects;
2018        static int promisor_objects_prepared;
2019
2020        if (!promisor_objects_prepared) {
2021                if (repository_format_partial_clone) {
2022                        for_each_packed_object(add_promisor_object,
2023                                               &promisor_objects,
2024                                               FOR_EACH_OBJECT_PROMISOR_ONLY);
2025                }
2026                promisor_objects_prepared = 1;
2027        }
2028        return oidset_contains(&promisor_objects, oid);
2029}