packfile.con commit object-store: migrate alternates struct and functions from cache.h (0d4a132)
   1#include "cache.h"
   2#include "list.h"
   3#include "pack.h"
   4#include "dir.h"
   5#include "mergesort.h"
   6#include "packfile.h"
   7#include "delta.h"
   8#include "list.h"
   9#include "streaming.h"
  10#include "sha1-lookup.h"
  11#include "commit.h"
  12#include "object.h"
  13#include "tag.h"
  14#include "tree-walk.h"
  15#include "tree.h"
  16#include "object-store.h"
  17
  18char *odb_pack_name(struct strbuf *buf,
  19                    const unsigned char *sha1,
  20                    const char *ext)
  21{
  22        strbuf_reset(buf);
  23        strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
  24                    sha1_to_hex(sha1), ext);
  25        return buf->buf;
  26}
  27
  28char *sha1_pack_name(const unsigned char *sha1)
  29{
  30        static struct strbuf buf = STRBUF_INIT;
  31        return odb_pack_name(&buf, sha1, "pack");
  32}
  33
  34char *sha1_pack_index_name(const unsigned char *sha1)
  35{
  36        static struct strbuf buf = STRBUF_INIT;
  37        return odb_pack_name(&buf, sha1, "idx");
  38}
  39
  40static unsigned int pack_used_ctr;
  41static unsigned int pack_mmap_calls;
  42static unsigned int peak_pack_open_windows;
  43static unsigned int pack_open_windows;
  44static unsigned int pack_open_fds;
  45static unsigned int pack_max_fds;
  46static size_t peak_pack_mapped;
  47static size_t pack_mapped;
  48struct packed_git *packed_git;
  49LIST_HEAD(packed_git_mru);
  50
  51#define SZ_FMT PRIuMAX
  52static inline uintmax_t sz_fmt(size_t s) { return s; }
  53
  54void pack_report(void)
  55{
  56        fprintf(stderr,
  57                "pack_report: getpagesize()            = %10" SZ_FMT "\n"
  58                "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
  59                "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
  60                sz_fmt(getpagesize()),
  61                sz_fmt(packed_git_window_size),
  62                sz_fmt(packed_git_limit));
  63        fprintf(stderr,
  64                "pack_report: pack_used_ctr            = %10u\n"
  65                "pack_report: pack_mmap_calls          = %10u\n"
  66                "pack_report: pack_open_windows        = %10u / %10u\n"
  67                "pack_report: pack_mapped              = "
  68                        "%10" SZ_FMT " / %10" SZ_FMT "\n",
  69                pack_used_ctr,
  70                pack_mmap_calls,
  71                pack_open_windows, peak_pack_open_windows,
  72                sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
  73}
  74
  75/*
  76 * Open and mmap the index file at path, perform a couple of
  77 * consistency checks, then record its information to p.  Return 0 on
  78 * success.
  79 */
  80static int check_packed_git_idx(const char *path, struct packed_git *p)
  81{
  82        void *idx_map;
  83        struct pack_idx_header *hdr;
  84        size_t idx_size;
  85        uint32_t version, nr, i, *index;
  86        int fd = git_open(path);
  87        struct stat st;
  88
  89        if (fd < 0)
  90                return -1;
  91        if (fstat(fd, &st)) {
  92                close(fd);
  93                return -1;
  94        }
  95        idx_size = xsize_t(st.st_size);
  96        if (idx_size < 4 * 256 + 20 + 20) {
  97                close(fd);
  98                return error("index file %s is too small", path);
  99        }
 100        idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 101        close(fd);
 102
 103        hdr = idx_map;
 104        if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
 105                version = ntohl(hdr->idx_version);
 106                if (version < 2 || version > 2) {
 107                        munmap(idx_map, idx_size);
 108                        return error("index file %s is version %"PRIu32
 109                                     " and is not supported by this binary"
 110                                     " (try upgrading GIT to a newer version)",
 111                                     path, version);
 112                }
 113        } else
 114                version = 1;
 115
 116        nr = 0;
 117        index = idx_map;
 118        if (version > 1)
 119                index += 2;  /* skip index header */
 120        for (i = 0; i < 256; i++) {
 121                uint32_t n = ntohl(index[i]);
 122                if (n < nr) {
 123                        munmap(idx_map, idx_size);
 124                        return error("non-monotonic index %s", path);
 125                }
 126                nr = n;
 127        }
 128
 129        if (version == 1) {
 130                /*
 131                 * Total size:
 132                 *  - 256 index entries 4 bytes each
 133                 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
 134                 *  - 20-byte SHA1 of the packfile
 135                 *  - 20-byte SHA1 file checksum
 136                 */
 137                if (idx_size != 4*256 + nr * 24 + 20 + 20) {
 138                        munmap(idx_map, idx_size);
 139                        return error("wrong index v1 file size in %s", path);
 140                }
 141        } else if (version == 2) {
 142                /*
 143                 * Minimum size:
 144                 *  - 8 bytes of header
 145                 *  - 256 index entries 4 bytes each
 146                 *  - 20-byte sha1 entry * nr
 147                 *  - 4-byte crc entry * nr
 148                 *  - 4-byte offset entry * nr
 149                 *  - 20-byte SHA1 of the packfile
 150                 *  - 20-byte SHA1 file checksum
 151                 * And after the 4-byte offset table might be a
 152                 * variable sized table containing 8-byte entries
 153                 * for offsets larger than 2^31.
 154                 */
 155                unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
 156                unsigned long max_size = min_size;
 157                if (nr)
 158                        max_size += (nr - 1)*8;
 159                if (idx_size < min_size || idx_size > max_size) {
 160                        munmap(idx_map, idx_size);
 161                        return error("wrong index v2 file size in %s", path);
 162                }
 163                if (idx_size != min_size &&
 164                    /*
 165                     * make sure we can deal with large pack offsets.
 166                     * 31-bit signed offset won't be enough, neither
 167                     * 32-bit unsigned one will be.
 168                     */
 169                    (sizeof(off_t) <= 4)) {
 170                        munmap(idx_map, idx_size);
 171                        return error("pack too large for current definition of off_t in %s", path);
 172                }
 173        }
 174
 175        p->index_version = version;
 176        p->index_data = idx_map;
 177        p->index_size = idx_size;
 178        p->num_objects = nr;
 179        return 0;
 180}
 181
 182int open_pack_index(struct packed_git *p)
 183{
 184        char *idx_name;
 185        size_t len;
 186        int ret;
 187
 188        if (p->index_data)
 189                return 0;
 190
 191        if (!strip_suffix(p->pack_name, ".pack", &len))
 192                die("BUG: pack_name does not end in .pack");
 193        idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
 194        ret = check_packed_git_idx(idx_name, p);
 195        free(idx_name);
 196        return ret;
 197}
 198
 199static struct packed_git *alloc_packed_git(int extra)
 200{
 201        struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
 202        memset(p, 0, sizeof(*p));
 203        p->pack_fd = -1;
 204        return p;
 205}
 206
 207struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
 208{
 209        const char *path = sha1_pack_name(sha1);
 210        size_t alloc = st_add(strlen(path), 1);
 211        struct packed_git *p = alloc_packed_git(alloc);
 212
 213        memcpy(p->pack_name, path, alloc); /* includes NUL */
 214        hashcpy(p->sha1, sha1);
 215        if (check_packed_git_idx(idx_path, p)) {
 216                free(p);
 217                return NULL;
 218        }
 219
 220        return p;
 221}
 222
 223static void scan_windows(struct packed_git *p,
 224        struct packed_git **lru_p,
 225        struct pack_window **lru_w,
 226        struct pack_window **lru_l)
 227{
 228        struct pack_window *w, *w_l;
 229
 230        for (w_l = NULL, w = p->windows; w; w = w->next) {
 231                if (!w->inuse_cnt) {
 232                        if (!*lru_w || w->last_used < (*lru_w)->last_used) {
 233                                *lru_p = p;
 234                                *lru_w = w;
 235                                *lru_l = w_l;
 236                        }
 237                }
 238                w_l = w;
 239        }
 240}
 241
 242static int unuse_one_window(struct packed_git *current)
 243{
 244        struct packed_git *p, *lru_p = NULL;
 245        struct pack_window *lru_w = NULL, *lru_l = NULL;
 246
 247        if (current)
 248                scan_windows(current, &lru_p, &lru_w, &lru_l);
 249        for (p = packed_git; p; p = p->next)
 250                scan_windows(p, &lru_p, &lru_w, &lru_l);
 251        if (lru_p) {
 252                munmap(lru_w->base, lru_w->len);
 253                pack_mapped -= lru_w->len;
 254                if (lru_l)
 255                        lru_l->next = lru_w->next;
 256                else
 257                        lru_p->windows = lru_w->next;
 258                free(lru_w);
 259                pack_open_windows--;
 260                return 1;
 261        }
 262        return 0;
 263}
 264
 265void release_pack_memory(size_t need)
 266{
 267        size_t cur = pack_mapped;
 268        while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
 269                ; /* nothing */
 270}
 271
 272void close_pack_windows(struct packed_git *p)
 273{
 274        while (p->windows) {
 275                struct pack_window *w = p->windows;
 276
 277                if (w->inuse_cnt)
 278                        die("pack '%s' still has open windows to it",
 279                            p->pack_name);
 280                munmap(w->base, w->len);
 281                pack_mapped -= w->len;
 282                pack_open_windows--;
 283                p->windows = w->next;
 284                free(w);
 285        }
 286}
 287
 288static int close_pack_fd(struct packed_git *p)
 289{
 290        if (p->pack_fd < 0)
 291                return 0;
 292
 293        close(p->pack_fd);
 294        pack_open_fds--;
 295        p->pack_fd = -1;
 296
 297        return 1;
 298}
 299
 300void close_pack_index(struct packed_git *p)
 301{
 302        if (p->index_data) {
 303                munmap((void *)p->index_data, p->index_size);
 304                p->index_data = NULL;
 305        }
 306}
 307
 308static void close_pack(struct packed_git *p)
 309{
 310        close_pack_windows(p);
 311        close_pack_fd(p);
 312        close_pack_index(p);
 313}
 314
 315void close_all_packs(void)
 316{
 317        struct packed_git *p;
 318
 319        for (p = packed_git; p; p = p->next)
 320                if (p->do_not_close)
 321                        die("BUG: want to close pack marked 'do-not-close'");
 322                else
 323                        close_pack(p);
 324}
 325
 326/*
 327 * The LRU pack is the one with the oldest MRU window, preferring packs
 328 * with no used windows, or the oldest mtime if it has no windows allocated.
 329 */
 330static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
 331{
 332        struct pack_window *w, *this_mru_w;
 333        int has_windows_inuse = 0;
 334
 335        /*
 336         * Reject this pack if it has windows and the previously selected
 337         * one does not.  If this pack does not have windows, reject
 338         * it if the pack file is newer than the previously selected one.
 339         */
 340        if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
 341                return;
 342
 343        for (w = this_mru_w = p->windows; w; w = w->next) {
 344                /*
 345                 * Reject this pack if any of its windows are in use,
 346                 * but the previously selected pack did not have any
 347                 * inuse windows.  Otherwise, record that this pack
 348                 * has windows in use.
 349                 */
 350                if (w->inuse_cnt) {
 351                        if (*accept_windows_inuse)
 352                                has_windows_inuse = 1;
 353                        else
 354                                return;
 355                }
 356
 357                if (w->last_used > this_mru_w->last_used)
 358                        this_mru_w = w;
 359
 360                /*
 361                 * Reject this pack if it has windows that have been
 362                 * used more recently than the previously selected pack.
 363                 * If the previously selected pack had windows inuse and
 364                 * we have not encountered a window in this pack that is
 365                 * inuse, skip this check since we prefer a pack with no
 366                 * inuse windows to one that has inuse windows.
 367                 */
 368                if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
 369                    this_mru_w->last_used > (*mru_w)->last_used)
 370                        return;
 371        }
 372
 373        /*
 374         * Select this pack.
 375         */
 376        *mru_w = this_mru_w;
 377        *lru_p = p;
 378        *accept_windows_inuse = has_windows_inuse;
 379}
 380
 381static int close_one_pack(void)
 382{
 383        struct packed_git *p, *lru_p = NULL;
 384        struct pack_window *mru_w = NULL;
 385        int accept_windows_inuse = 1;
 386
 387        for (p = packed_git; p; p = p->next) {
 388                if (p->pack_fd == -1)
 389                        continue;
 390                find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
 391        }
 392
 393        if (lru_p)
 394                return close_pack_fd(lru_p);
 395
 396        return 0;
 397}
 398
 399static unsigned int get_max_fd_limit(void)
 400{
 401#ifdef RLIMIT_NOFILE
 402        {
 403                struct rlimit lim;
 404
 405                if (!getrlimit(RLIMIT_NOFILE, &lim))
 406                        return lim.rlim_cur;
 407        }
 408#endif
 409
 410#ifdef _SC_OPEN_MAX
 411        {
 412                long open_max = sysconf(_SC_OPEN_MAX);
 413                if (0 < open_max)
 414                        return open_max;
 415                /*
 416                 * Otherwise, we got -1 for one of the two
 417                 * reasons:
 418                 *
 419                 * (1) sysconf() did not understand _SC_OPEN_MAX
 420                 *     and signaled an error with -1; or
 421                 * (2) sysconf() said there is no limit.
 422                 *
 423                 * We _could_ clear errno before calling sysconf() to
 424                 * tell these two cases apart and return a huge number
 425                 * in the latter case to let the caller cap it to a
 426                 * value that is not so selfish, but letting the
 427                 * fallback OPEN_MAX codepath take care of these cases
 428                 * is a lot simpler.
 429                 */
 430        }
 431#endif
 432
 433#ifdef OPEN_MAX
 434        return OPEN_MAX;
 435#else
 436        return 1; /* see the caller ;-) */
 437#endif
 438}
 439
 440/*
 441 * Do not call this directly as this leaks p->pack_fd on error return;
 442 * call open_packed_git() instead.
 443 */
 444static int open_packed_git_1(struct packed_git *p)
 445{
 446        struct stat st;
 447        struct pack_header hdr;
 448        unsigned char sha1[20];
 449        unsigned char *idx_sha1;
 450        long fd_flag;
 451        ssize_t read_result;
 452
 453        if (!p->index_data && open_pack_index(p))
 454                return error("packfile %s index unavailable", p->pack_name);
 455
 456        if (!pack_max_fds) {
 457                unsigned int max_fds = get_max_fd_limit();
 458
 459                /* Save 3 for stdin/stdout/stderr, 22 for work */
 460                if (25 < max_fds)
 461                        pack_max_fds = max_fds - 25;
 462                else
 463                        pack_max_fds = 1;
 464        }
 465
 466        while (pack_max_fds <= pack_open_fds && close_one_pack())
 467                ; /* nothing */
 468
 469        p->pack_fd = git_open(p->pack_name);
 470        if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
 471                return -1;
 472        pack_open_fds++;
 473
 474        /* If we created the struct before we had the pack we lack size. */
 475        if (!p->pack_size) {
 476                if (!S_ISREG(st.st_mode))
 477                        return error("packfile %s not a regular file", p->pack_name);
 478                p->pack_size = st.st_size;
 479        } else if (p->pack_size != st.st_size)
 480                return error("packfile %s size changed", p->pack_name);
 481
 482        /* We leave these file descriptors open with sliding mmap;
 483         * there is no point keeping them open across exec(), though.
 484         */
 485        fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
 486        if (fd_flag < 0)
 487                return error("cannot determine file descriptor flags");
 488        fd_flag |= FD_CLOEXEC;
 489        if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
 490                return error("cannot set FD_CLOEXEC");
 491
 492        /* Verify we recognize this pack file format. */
 493        read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
 494        if (read_result < 0)
 495                return error_errno("error reading from %s", p->pack_name);
 496        if (read_result != sizeof(hdr))
 497                return error("file %s is far too short to be a packfile", p->pack_name);
 498        if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
 499                return error("file %s is not a GIT packfile", p->pack_name);
 500        if (!pack_version_ok(hdr.hdr_version))
 501                return error("packfile %s is version %"PRIu32" and not"
 502                        " supported (try upgrading GIT to a newer version)",
 503                        p->pack_name, ntohl(hdr.hdr_version));
 504
 505        /* Verify the pack matches its index. */
 506        if (p->num_objects != ntohl(hdr.hdr_entries))
 507                return error("packfile %s claims to have %"PRIu32" objects"
 508                             " while index indicates %"PRIu32" objects",
 509                             p->pack_name, ntohl(hdr.hdr_entries),
 510                             p->num_objects);
 511        if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
 512                return error("end of packfile %s is unavailable", p->pack_name);
 513        read_result = read_in_full(p->pack_fd, sha1, sizeof(sha1));
 514        if (read_result < 0)
 515                return error_errno("error reading from %s", p->pack_name);
 516        if (read_result != sizeof(sha1))
 517                return error("packfile %s signature is unavailable", p->pack_name);
 518        idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
 519        if (hashcmp(sha1, idx_sha1))
 520                return error("packfile %s does not match index", p->pack_name);
 521        return 0;
 522}
 523
 524static int open_packed_git(struct packed_git *p)
 525{
 526        if (!open_packed_git_1(p))
 527                return 0;
 528        close_pack_fd(p);
 529        return -1;
 530}
 531
 532static int in_window(struct pack_window *win, off_t offset)
 533{
 534        /* We must promise at least 20 bytes (one hash) after the
 535         * offset is available from this window, otherwise the offset
 536         * is not actually in this window and a different window (which
 537         * has that one hash excess) must be used.  This is to support
 538         * the object header and delta base parsing routines below.
 539         */
 540        off_t win_off = win->offset;
 541        return win_off <= offset
 542                && (offset + 20) <= (win_off + win->len);
 543}
 544
 545unsigned char *use_pack(struct packed_git *p,
 546                struct pack_window **w_cursor,
 547                off_t offset,
 548                unsigned long *left)
 549{
 550        struct pack_window *win = *w_cursor;
 551
 552        /* Since packfiles end in a hash of their content and it's
 553         * pointless to ask for an offset into the middle of that
 554         * hash, and the in_window function above wouldn't match
 555         * don't allow an offset too close to the end of the file.
 556         */
 557        if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
 558                die("packfile %s cannot be accessed", p->pack_name);
 559        if (offset > (p->pack_size - 20))
 560                die("offset beyond end of packfile (truncated pack?)");
 561        if (offset < 0)
 562                die(_("offset before end of packfile (broken .idx?)"));
 563
 564        if (!win || !in_window(win, offset)) {
 565                if (win)
 566                        win->inuse_cnt--;
 567                for (win = p->windows; win; win = win->next) {
 568                        if (in_window(win, offset))
 569                                break;
 570                }
 571                if (!win) {
 572                        size_t window_align = packed_git_window_size / 2;
 573                        off_t len;
 574
 575                        if (p->pack_fd == -1 && open_packed_git(p))
 576                                die("packfile %s cannot be accessed", p->pack_name);
 577
 578                        win = xcalloc(1, sizeof(*win));
 579                        win->offset = (offset / window_align) * window_align;
 580                        len = p->pack_size - win->offset;
 581                        if (len > packed_git_window_size)
 582                                len = packed_git_window_size;
 583                        win->len = (size_t)len;
 584                        pack_mapped += win->len;
 585                        while (packed_git_limit < pack_mapped
 586                                && unuse_one_window(p))
 587                                ; /* nothing */
 588                        win->base = xmmap(NULL, win->len,
 589                                PROT_READ, MAP_PRIVATE,
 590                                p->pack_fd, win->offset);
 591                        if (win->base == MAP_FAILED)
 592                                die_errno("packfile %s cannot be mapped",
 593                                          p->pack_name);
 594                        if (!win->offset && win->len == p->pack_size
 595                                && !p->do_not_close)
 596                                close_pack_fd(p);
 597                        pack_mmap_calls++;
 598                        pack_open_windows++;
 599                        if (pack_mapped > peak_pack_mapped)
 600                                peak_pack_mapped = pack_mapped;
 601                        if (pack_open_windows > peak_pack_open_windows)
 602                                peak_pack_open_windows = pack_open_windows;
 603                        win->next = p->windows;
 604                        p->windows = win;
 605                }
 606        }
 607        if (win != *w_cursor) {
 608                win->last_used = pack_used_ctr++;
 609                win->inuse_cnt++;
 610                *w_cursor = win;
 611        }
 612        offset -= win->offset;
 613        if (left)
 614                *left = win->len - xsize_t(offset);
 615        return win->base + offset;
 616}
 617
 618void unuse_pack(struct pack_window **w_cursor)
 619{
 620        struct pack_window *w = *w_cursor;
 621        if (w) {
 622                w->inuse_cnt--;
 623                *w_cursor = NULL;
 624        }
 625}
 626
 627static void try_to_free_pack_memory(size_t size)
 628{
 629        release_pack_memory(size);
 630}
 631
 632struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 633{
 634        static int have_set_try_to_free_routine;
 635        struct stat st;
 636        size_t alloc;
 637        struct packed_git *p;
 638
 639        if (!have_set_try_to_free_routine) {
 640                have_set_try_to_free_routine = 1;
 641                set_try_to_free_routine(try_to_free_pack_memory);
 642        }
 643
 644        /*
 645         * Make sure a corresponding .pack file exists and that
 646         * the index looks sane.
 647         */
 648        if (!strip_suffix_mem(path, &path_len, ".idx"))
 649                return NULL;
 650
 651        /*
 652         * ".promisor" is long enough to hold any suffix we're adding (and
 653         * the use xsnprintf double-checks that)
 654         */
 655        alloc = st_add3(path_len, strlen(".promisor"), 1);
 656        p = alloc_packed_git(alloc);
 657        memcpy(p->pack_name, path, path_len);
 658
 659        xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
 660        if (!access(p->pack_name, F_OK))
 661                p->pack_keep = 1;
 662
 663        xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
 664        if (!access(p->pack_name, F_OK))
 665                p->pack_promisor = 1;
 666
 667        xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
 668        if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
 669                free(p);
 670                return NULL;
 671        }
 672
 673        /* ok, it looks sane as far as we can check without
 674         * actually mapping the pack file.
 675         */
 676        p->pack_size = st.st_size;
 677        p->pack_local = local;
 678        p->mtime = st.st_mtime;
 679        if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
 680                hashclr(p->sha1);
 681        return p;
 682}
 683
 684void install_packed_git(struct packed_git *pack)
 685{
 686        if (pack->pack_fd != -1)
 687                pack_open_fds++;
 688
 689        pack->next = packed_git;
 690        packed_git = pack;
 691}
 692
 693void (*report_garbage)(unsigned seen_bits, const char *path);
 694
 695static void report_helper(const struct string_list *list,
 696                          int seen_bits, int first, int last)
 697{
 698        if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
 699                return;
 700
 701        for (; first < last; first++)
 702                report_garbage(seen_bits, list->items[first].string);
 703}
 704
 705static void report_pack_garbage(struct string_list *list)
 706{
 707        int i, baselen = -1, first = 0, seen_bits = 0;
 708
 709        if (!report_garbage)
 710                return;
 711
 712        string_list_sort(list);
 713
 714        for (i = 0; i < list->nr; i++) {
 715                const char *path = list->items[i].string;
 716                if (baselen != -1 &&
 717                    strncmp(path, list->items[first].string, baselen)) {
 718                        report_helper(list, seen_bits, first, i);
 719                        baselen = -1;
 720                        seen_bits = 0;
 721                }
 722                if (baselen == -1) {
 723                        const char *dot = strrchr(path, '.');
 724                        if (!dot) {
 725                                report_garbage(PACKDIR_FILE_GARBAGE, path);
 726                                continue;
 727                        }
 728                        baselen = dot - path + 1;
 729                        first = i;
 730                }
 731                if (!strcmp(path + baselen, "pack"))
 732                        seen_bits |= 1;
 733                else if (!strcmp(path + baselen, "idx"))
 734                        seen_bits |= 2;
 735        }
 736        report_helper(list, seen_bits, first, list->nr);
 737}
 738
 739static void prepare_packed_git_one(char *objdir, int local)
 740{
 741        struct strbuf path = STRBUF_INIT;
 742        size_t dirnamelen;
 743        DIR *dir;
 744        struct dirent *de;
 745        struct string_list garbage = STRING_LIST_INIT_DUP;
 746
 747        strbuf_addstr(&path, objdir);
 748        strbuf_addstr(&path, "/pack");
 749        dir = opendir(path.buf);
 750        if (!dir) {
 751                if (errno != ENOENT)
 752                        error_errno("unable to open object pack directory: %s",
 753                                    path.buf);
 754                strbuf_release(&path);
 755                return;
 756        }
 757        strbuf_addch(&path, '/');
 758        dirnamelen = path.len;
 759        while ((de = readdir(dir)) != NULL) {
 760                struct packed_git *p;
 761                size_t base_len;
 762
 763                if (is_dot_or_dotdot(de->d_name))
 764                        continue;
 765
 766                strbuf_setlen(&path, dirnamelen);
 767                strbuf_addstr(&path, de->d_name);
 768
 769                base_len = path.len;
 770                if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
 771                        /* Don't reopen a pack we already have. */
 772                        for (p = packed_git; p; p = p->next) {
 773                                size_t len;
 774                                if (strip_suffix(p->pack_name, ".pack", &len) &&
 775                                    len == base_len &&
 776                                    !memcmp(p->pack_name, path.buf, len))
 777                                        break;
 778                        }
 779                        if (p == NULL &&
 780                            /*
 781                             * See if it really is a valid .idx file with
 782                             * corresponding .pack file that we can map.
 783                             */
 784                            (p = add_packed_git(path.buf, path.len, local)) != NULL)
 785                                install_packed_git(p);
 786                }
 787
 788                if (!report_garbage)
 789                        continue;
 790
 791                if (ends_with(de->d_name, ".idx") ||
 792                    ends_with(de->d_name, ".pack") ||
 793                    ends_with(de->d_name, ".bitmap") ||
 794                    ends_with(de->d_name, ".keep") ||
 795                    ends_with(de->d_name, ".promisor"))
 796                        string_list_append(&garbage, path.buf);
 797                else
 798                        report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
 799        }
 800        closedir(dir);
 801        report_pack_garbage(&garbage);
 802        string_list_clear(&garbage, 0);
 803        strbuf_release(&path);
 804}
 805
 806static int approximate_object_count_valid;
 807
 808/*
 809 * Give a fast, rough count of the number of objects in the repository. This
 810 * ignores loose objects completely. If you have a lot of them, then either
 811 * you should repack because your performance will be awful, or they are
 812 * all unreachable objects about to be pruned, in which case they're not really
 813 * interesting as a measure of repo size in the first place.
 814 */
 815unsigned long approximate_object_count(void)
 816{
 817        static unsigned long count;
 818        if (!approximate_object_count_valid) {
 819                struct packed_git *p;
 820
 821                prepare_packed_git();
 822                count = 0;
 823                for (p = packed_git; p; p = p->next) {
 824                        if (open_pack_index(p))
 825                                continue;
 826                        count += p->num_objects;
 827                }
 828        }
 829        return count;
 830}
 831
 832static void *get_next_packed_git(const void *p)
 833{
 834        return ((const struct packed_git *)p)->next;
 835}
 836
 837static void set_next_packed_git(void *p, void *next)
 838{
 839        ((struct packed_git *)p)->next = next;
 840}
 841
 842static int sort_pack(const void *a_, const void *b_)
 843{
 844        const struct packed_git *a = a_;
 845        const struct packed_git *b = b_;
 846        int st;
 847
 848        /*
 849         * Local packs tend to contain objects specific to our
 850         * variant of the project than remote ones.  In addition,
 851         * remote ones could be on a network mounted filesystem.
 852         * Favor local ones for these reasons.
 853         */
 854        st = a->pack_local - b->pack_local;
 855        if (st)
 856                return -st;
 857
 858        /*
 859         * Younger packs tend to contain more recent objects,
 860         * and more recent objects tend to get accessed more
 861         * often.
 862         */
 863        if (a->mtime < b->mtime)
 864                return 1;
 865        else if (a->mtime == b->mtime)
 866                return 0;
 867        return -1;
 868}
 869
 870static void rearrange_packed_git(void)
 871{
 872        packed_git = llist_mergesort(packed_git, get_next_packed_git,
 873                                     set_next_packed_git, sort_pack);
 874}
 875
 876static void prepare_packed_git_mru(void)
 877{
 878        struct packed_git *p;
 879
 880        INIT_LIST_HEAD(&packed_git_mru);
 881
 882        for (p = packed_git; p; p = p->next)
 883                list_add_tail(&p->mru, &packed_git_mru);
 884}
 885
 886static int prepare_packed_git_run_once = 0;
 887void prepare_packed_git(void)
 888{
 889        struct alternate_object_database *alt;
 890
 891        if (prepare_packed_git_run_once)
 892                return;
 893        prepare_packed_git_one(get_object_directory(), 1);
 894        prepare_alt_odb();
 895        for (alt = alt_odb_list; alt; alt = alt->next)
 896                prepare_packed_git_one(alt->path, 0);
 897        rearrange_packed_git();
 898        prepare_packed_git_mru();
 899        prepare_packed_git_run_once = 1;
 900}
 901
 902void reprepare_packed_git(void)
 903{
 904        approximate_object_count_valid = 0;
 905        prepare_packed_git_run_once = 0;
 906        prepare_packed_git();
 907}
 908
 909unsigned long unpack_object_header_buffer(const unsigned char *buf,
 910                unsigned long len, enum object_type *type, unsigned long *sizep)
 911{
 912        unsigned shift;
 913        unsigned long size, c;
 914        unsigned long used = 0;
 915
 916        c = buf[used++];
 917        *type = (c >> 4) & 7;
 918        size = c & 15;
 919        shift = 4;
 920        while (c & 0x80) {
 921                if (len <= used || bitsizeof(long) <= shift) {
 922                        error("bad object header");
 923                        size = used = 0;
 924                        break;
 925                }
 926                c = buf[used++];
 927                size += (c & 0x7f) << shift;
 928                shift += 7;
 929        }
 930        *sizep = size;
 931        return used;
 932}
 933
 934unsigned long get_size_from_delta(struct packed_git *p,
 935                                  struct pack_window **w_curs,
 936                                  off_t curpos)
 937{
 938        const unsigned char *data;
 939        unsigned char delta_head[20], *in;
 940        git_zstream stream;
 941        int st;
 942
 943        memset(&stream, 0, sizeof(stream));
 944        stream.next_out = delta_head;
 945        stream.avail_out = sizeof(delta_head);
 946
 947        git_inflate_init(&stream);
 948        do {
 949                in = use_pack(p, w_curs, curpos, &stream.avail_in);
 950                stream.next_in = in;
 951                st = git_inflate(&stream, Z_FINISH);
 952                curpos += stream.next_in - in;
 953        } while ((st == Z_OK || st == Z_BUF_ERROR) &&
 954                 stream.total_out < sizeof(delta_head));
 955        git_inflate_end(&stream);
 956        if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
 957                error("delta data unpack-initial failed");
 958                return 0;
 959        }
 960
 961        /* Examine the initial part of the delta to figure out
 962         * the result size.
 963         */
 964        data = delta_head;
 965
 966        /* ignore base size */
 967        get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 968
 969        /* Read the result size */
 970        return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 971}
 972
 973int unpack_object_header(struct packed_git *p,
 974                         struct pack_window **w_curs,
 975                         off_t *curpos,
 976                         unsigned long *sizep)
 977{
 978        unsigned char *base;
 979        unsigned long left;
 980        unsigned long used;
 981        enum object_type type;
 982
 983        /* use_pack() assures us we have [base, base + 20) available
 984         * as a range that we can look at.  (Its actually the hash
 985         * size that is assured.)  With our object header encoding
 986         * the maximum deflated object size is 2^137, which is just
 987         * insane, so we know won't exceed what we have been given.
 988         */
 989        base = use_pack(p, w_curs, *curpos, &left);
 990        used = unpack_object_header_buffer(base, left, &type, sizep);
 991        if (!used) {
 992                type = OBJ_BAD;
 993        } else
 994                *curpos += used;
 995
 996        return type;
 997}
 998
 999void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
1000{
1001        unsigned i;
1002        for (i = 0; i < p->num_bad_objects; i++)
1003                if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1004                        return;
1005        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
1006                                      st_mult(GIT_MAX_RAWSZ,
1007                                              st_add(p->num_bad_objects, 1)));
1008        hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1009        p->num_bad_objects++;
1010}
1011
1012const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1013{
1014        struct packed_git *p;
1015        unsigned i;
1016
1017        for (p = packed_git; p; p = p->next)
1018                for (i = 0; i < p->num_bad_objects; i++)
1019                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1020                                return p;
1021        return NULL;
1022}
1023
1024static off_t get_delta_base(struct packed_git *p,
1025                                    struct pack_window **w_curs,
1026                                    off_t *curpos,
1027                                    enum object_type type,
1028                                    off_t delta_obj_offset)
1029{
1030        unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1031        off_t base_offset;
1032
1033        /* use_pack() assured us we have [base_info, base_info + 20)
1034         * as a range that we can look at without walking off the
1035         * end of the mapped window.  Its actually the hash size
1036         * that is assured.  An OFS_DELTA longer than the hash size
1037         * is stupid, as then a REF_DELTA would be smaller to store.
1038         */
1039        if (type == OBJ_OFS_DELTA) {
1040                unsigned used = 0;
1041                unsigned char c = base_info[used++];
1042                base_offset = c & 127;
1043                while (c & 128) {
1044                        base_offset += 1;
1045                        if (!base_offset || MSB(base_offset, 7))
1046                                return 0;  /* overflow */
1047                        c = base_info[used++];
1048                        base_offset = (base_offset << 7) + (c & 127);
1049                }
1050                base_offset = delta_obj_offset - base_offset;
1051                if (base_offset <= 0 || base_offset >= delta_obj_offset)
1052                        return 0;  /* out of bound */
1053                *curpos += used;
1054        } else if (type == OBJ_REF_DELTA) {
1055                /* The base entry _must_ be in the same pack */
1056                base_offset = find_pack_entry_one(base_info, p);
1057                *curpos += 20;
1058        } else
1059                die("I am totally screwed");
1060        return base_offset;
1061}
1062
1063/*
1064 * Like get_delta_base above, but we return the sha1 instead of the pack
1065 * offset. This means it is cheaper for REF deltas (we do not have to do
1066 * the final object lookup), but more expensive for OFS deltas (we
1067 * have to load the revidx to convert the offset back into a sha1).
1068 */
1069static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1070                                                struct pack_window **w_curs,
1071                                                off_t curpos,
1072                                                enum object_type type,
1073                                                off_t delta_obj_offset)
1074{
1075        if (type == OBJ_REF_DELTA) {
1076                unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1077                return base;
1078        } else if (type == OBJ_OFS_DELTA) {
1079                struct revindex_entry *revidx;
1080                off_t base_offset = get_delta_base(p, w_curs, &curpos,
1081                                                   type, delta_obj_offset);
1082
1083                if (!base_offset)
1084                        return NULL;
1085
1086                revidx = find_pack_revindex(p, base_offset);
1087                if (!revidx)
1088                        return NULL;
1089
1090                return nth_packed_object_sha1(p, revidx->nr);
1091        } else
1092                return NULL;
1093}
1094
1095static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1096{
1097        int type;
1098        struct revindex_entry *revidx;
1099        const unsigned char *sha1;
1100        revidx = find_pack_revindex(p, obj_offset);
1101        if (!revidx)
1102                return OBJ_BAD;
1103        sha1 = nth_packed_object_sha1(p, revidx->nr);
1104        mark_bad_packed_object(p, sha1);
1105        type = sha1_object_info(sha1, NULL);
1106        if (type <= OBJ_NONE)
1107                return OBJ_BAD;
1108        return type;
1109}
1110
1111#define POI_STACK_PREALLOC 64
1112
1113static enum object_type packed_to_object_type(struct packed_git *p,
1114                                              off_t obj_offset,
1115                                              enum object_type type,
1116                                              struct pack_window **w_curs,
1117                                              off_t curpos)
1118{
1119        off_t small_poi_stack[POI_STACK_PREALLOC];
1120        off_t *poi_stack = small_poi_stack;
1121        int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1122
1123        while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1124                off_t base_offset;
1125                unsigned long size;
1126                /* Push the object we're going to leave behind */
1127                if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1128                        poi_stack_alloc = alloc_nr(poi_stack_nr);
1129                        ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1130                        memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1131                } else {
1132                        ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1133                }
1134                poi_stack[poi_stack_nr++] = obj_offset;
1135                /* If parsing the base offset fails, just unwind */
1136                base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1137                if (!base_offset)
1138                        goto unwind;
1139                curpos = obj_offset = base_offset;
1140                type = unpack_object_header(p, w_curs, &curpos, &size);
1141                if (type <= OBJ_NONE) {
1142                        /* If getting the base itself fails, we first
1143                         * retry the base, otherwise unwind */
1144                        type = retry_bad_packed_offset(p, base_offset);
1145                        if (type > OBJ_NONE)
1146                                goto out;
1147                        goto unwind;
1148                }
1149        }
1150
1151        switch (type) {
1152        case OBJ_BAD:
1153        case OBJ_COMMIT:
1154        case OBJ_TREE:
1155        case OBJ_BLOB:
1156        case OBJ_TAG:
1157                break;
1158        default:
1159                error("unknown object type %i at offset %"PRIuMAX" in %s",
1160                      type, (uintmax_t)obj_offset, p->pack_name);
1161                type = OBJ_BAD;
1162        }
1163
1164out:
1165        if (poi_stack != small_poi_stack)
1166                free(poi_stack);
1167        return type;
1168
1169unwind:
1170        while (poi_stack_nr) {
1171                obj_offset = poi_stack[--poi_stack_nr];
1172                type = retry_bad_packed_offset(p, obj_offset);
1173                if (type > OBJ_NONE)
1174                        goto out;
1175        }
1176        type = OBJ_BAD;
1177        goto out;
1178}
1179
1180static struct hashmap delta_base_cache;
1181static size_t delta_base_cached;
1182
1183static LIST_HEAD(delta_base_cache_lru);
1184
1185struct delta_base_cache_key {
1186        struct packed_git *p;
1187        off_t base_offset;
1188};
1189
1190struct delta_base_cache_entry {
1191        struct hashmap hash;
1192        struct delta_base_cache_key key;
1193        struct list_head lru;
1194        void *data;
1195        unsigned long size;
1196        enum object_type type;
1197};
1198
1199static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1200{
1201        unsigned int hash;
1202
1203        hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1204        hash += (hash >> 8) + (hash >> 16);
1205        return hash;
1206}
1207
1208static struct delta_base_cache_entry *
1209get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1210{
1211        struct hashmap_entry entry;
1212        struct delta_base_cache_key key;
1213
1214        if (!delta_base_cache.cmpfn)
1215                return NULL;
1216
1217        hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1218        key.p = p;
1219        key.base_offset = base_offset;
1220        return hashmap_get(&delta_base_cache, &entry, &key);
1221}
1222
1223static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1224                                   const struct delta_base_cache_key *b)
1225{
1226        return a->p == b->p && a->base_offset == b->base_offset;
1227}
1228
1229static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1230                                     const void *va, const void *vb,
1231                                     const void *vkey)
1232{
1233        const struct delta_base_cache_entry *a = va, *b = vb;
1234        const struct delta_base_cache_key *key = vkey;
1235        if (key)
1236                return !delta_base_cache_key_eq(&a->key, key);
1237        else
1238                return !delta_base_cache_key_eq(&a->key, &b->key);
1239}
1240
1241static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1242{
1243        return !!get_delta_base_cache_entry(p, base_offset);
1244}
1245
1246/*
1247 * Remove the entry from the cache, but do _not_ free the associated
1248 * entry data. The caller takes ownership of the "data" buffer, and
1249 * should copy out any fields it wants before detaching.
1250 */
1251static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1252{
1253        hashmap_remove(&delta_base_cache, ent, &ent->key);
1254        list_del(&ent->lru);
1255        delta_base_cached -= ent->size;
1256        free(ent);
1257}
1258
1259static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1260        unsigned long *base_size, enum object_type *type)
1261{
1262        struct delta_base_cache_entry *ent;
1263
1264        ent = get_delta_base_cache_entry(p, base_offset);
1265        if (!ent)
1266                return unpack_entry(p, base_offset, type, base_size);
1267
1268        if (type)
1269                *type = ent->type;
1270        if (base_size)
1271                *base_size = ent->size;
1272        return xmemdupz(ent->data, ent->size);
1273}
1274
1275static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1276{
1277        free(ent->data);
1278        detach_delta_base_cache_entry(ent);
1279}
1280
1281void clear_delta_base_cache(void)
1282{
1283        struct list_head *lru, *tmp;
1284        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1285                struct delta_base_cache_entry *entry =
1286                        list_entry(lru, struct delta_base_cache_entry, lru);
1287                release_delta_base_cache(entry);
1288        }
1289}
1290
1291static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1292        void *base, unsigned long base_size, enum object_type type)
1293{
1294        struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1295        struct list_head *lru, *tmp;
1296
1297        delta_base_cached += base_size;
1298
1299        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1300                struct delta_base_cache_entry *f =
1301                        list_entry(lru, struct delta_base_cache_entry, lru);
1302                if (delta_base_cached <= delta_base_cache_limit)
1303                        break;
1304                release_delta_base_cache(f);
1305        }
1306
1307        ent->key.p = p;
1308        ent->key.base_offset = base_offset;
1309        ent->type = type;
1310        ent->data = base;
1311        ent->size = base_size;
1312        list_add_tail(&ent->lru, &delta_base_cache_lru);
1313
1314        if (!delta_base_cache.cmpfn)
1315                hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1316        hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1317        hashmap_add(&delta_base_cache, ent);
1318}
1319
1320int packed_object_info(struct packed_git *p, off_t obj_offset,
1321                       struct object_info *oi)
1322{
1323        struct pack_window *w_curs = NULL;
1324        unsigned long size;
1325        off_t curpos = obj_offset;
1326        enum object_type type;
1327
1328        /*
1329         * We always get the representation type, but only convert it to
1330         * a "real" type later if the caller is interested.
1331         */
1332        if (oi->contentp) {
1333                *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1334                                                      &type);
1335                if (!*oi->contentp)
1336                        type = OBJ_BAD;
1337        } else {
1338                type = unpack_object_header(p, &w_curs, &curpos, &size);
1339        }
1340
1341        if (!oi->contentp && oi->sizep) {
1342                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1343                        off_t tmp_pos = curpos;
1344                        off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1345                                                           type, obj_offset);
1346                        if (!base_offset) {
1347                                type = OBJ_BAD;
1348                                goto out;
1349                        }
1350                        *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1351                        if (*oi->sizep == 0) {
1352                                type = OBJ_BAD;
1353                                goto out;
1354                        }
1355                } else {
1356                        *oi->sizep = size;
1357                }
1358        }
1359
1360        if (oi->disk_sizep) {
1361                struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1362                *oi->disk_sizep = revidx[1].offset - obj_offset;
1363        }
1364
1365        if (oi->typep || oi->typename) {
1366                enum object_type ptot;
1367                ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1368                                             curpos);
1369                if (oi->typep)
1370                        *oi->typep = ptot;
1371                if (oi->typename) {
1372                        const char *tn = typename(ptot);
1373                        if (tn)
1374                                strbuf_addstr(oi->typename, tn);
1375                }
1376                if (ptot < 0) {
1377                        type = OBJ_BAD;
1378                        goto out;
1379                }
1380        }
1381
1382        if (oi->delta_base_sha1) {
1383                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1384                        const unsigned char *base;
1385
1386                        base = get_delta_base_sha1(p, &w_curs, curpos,
1387                                                   type, obj_offset);
1388                        if (!base) {
1389                                type = OBJ_BAD;
1390                                goto out;
1391                        }
1392
1393                        hashcpy(oi->delta_base_sha1, base);
1394                } else
1395                        hashclr(oi->delta_base_sha1);
1396        }
1397
1398        oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1399                                                          OI_PACKED;
1400
1401out:
1402        unuse_pack(&w_curs);
1403        return type;
1404}
1405
1406static void *unpack_compressed_entry(struct packed_git *p,
1407                                    struct pack_window **w_curs,
1408                                    off_t curpos,
1409                                    unsigned long size)
1410{
1411        int st;
1412        git_zstream stream;
1413        unsigned char *buffer, *in;
1414
1415        buffer = xmallocz_gently(size);
1416        if (!buffer)
1417                return NULL;
1418        memset(&stream, 0, sizeof(stream));
1419        stream.next_out = buffer;
1420        stream.avail_out = size + 1;
1421
1422        git_inflate_init(&stream);
1423        do {
1424                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1425                stream.next_in = in;
1426                st = git_inflate(&stream, Z_FINISH);
1427                if (!stream.avail_out)
1428                        break; /* the payload is larger than it should be */
1429                curpos += stream.next_in - in;
1430        } while (st == Z_OK || st == Z_BUF_ERROR);
1431        git_inflate_end(&stream);
1432        if ((st != Z_STREAM_END) || stream.total_out != size) {
1433                free(buffer);
1434                return NULL;
1435        }
1436
1437        return buffer;
1438}
1439
1440static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1441{
1442        static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1443        trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1444                         p->pack_name, (uintmax_t)obj_offset);
1445}
1446
1447int do_check_packed_object_crc;
1448
1449#define UNPACK_ENTRY_STACK_PREALLOC 64
1450struct unpack_entry_stack_ent {
1451        off_t obj_offset;
1452        off_t curpos;
1453        unsigned long size;
1454};
1455
1456static void *read_object(const unsigned char *sha1, enum object_type *type,
1457                         unsigned long *size)
1458{
1459        struct object_info oi = OBJECT_INFO_INIT;
1460        void *content;
1461        oi.typep = type;
1462        oi.sizep = size;
1463        oi.contentp = &content;
1464
1465        if (sha1_object_info_extended(sha1, &oi, 0) < 0)
1466                return NULL;
1467        return content;
1468}
1469
1470void *unpack_entry(struct packed_git *p, off_t obj_offset,
1471                   enum object_type *final_type, unsigned long *final_size)
1472{
1473        struct pack_window *w_curs = NULL;
1474        off_t curpos = obj_offset;
1475        void *data = NULL;
1476        unsigned long size;
1477        enum object_type type;
1478        struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1479        struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1480        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1481        int base_from_cache = 0;
1482
1483        write_pack_access_log(p, obj_offset);
1484
1485        /* PHASE 1: drill down to the innermost base object */
1486        for (;;) {
1487                off_t base_offset;
1488                int i;
1489                struct delta_base_cache_entry *ent;
1490
1491                ent = get_delta_base_cache_entry(p, curpos);
1492                if (ent) {
1493                        type = ent->type;
1494                        data = ent->data;
1495                        size = ent->size;
1496                        detach_delta_base_cache_entry(ent);
1497                        base_from_cache = 1;
1498                        break;
1499                }
1500
1501                if (do_check_packed_object_crc && p->index_version > 1) {
1502                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1503                        off_t len = revidx[1].offset - obj_offset;
1504                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1505                                const unsigned char *sha1 =
1506                                        nth_packed_object_sha1(p, revidx->nr);
1507                                error("bad packed object CRC for %s",
1508                                      sha1_to_hex(sha1));
1509                                mark_bad_packed_object(p, sha1);
1510                                data = NULL;
1511                                goto out;
1512                        }
1513                }
1514
1515                type = unpack_object_header(p, &w_curs, &curpos, &size);
1516                if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1517                        break;
1518
1519                base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1520                if (!base_offset) {
1521                        error("failed to validate delta base reference "
1522                              "at offset %"PRIuMAX" from %s",
1523                              (uintmax_t)curpos, p->pack_name);
1524                        /* bail to phase 2, in hopes of recovery */
1525                        data = NULL;
1526                        break;
1527                }
1528
1529                /* push object, proceed to base */
1530                if (delta_stack_nr >= delta_stack_alloc
1531                    && delta_stack == small_delta_stack) {
1532                        delta_stack_alloc = alloc_nr(delta_stack_nr);
1533                        ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1534                        memcpy(delta_stack, small_delta_stack,
1535                               sizeof(*delta_stack)*delta_stack_nr);
1536                } else {
1537                        ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1538                }
1539                i = delta_stack_nr++;
1540                delta_stack[i].obj_offset = obj_offset;
1541                delta_stack[i].curpos = curpos;
1542                delta_stack[i].size = size;
1543
1544                curpos = obj_offset = base_offset;
1545        }
1546
1547        /* PHASE 2: handle the base */
1548        switch (type) {
1549        case OBJ_OFS_DELTA:
1550        case OBJ_REF_DELTA:
1551                if (data)
1552                        die("BUG: unpack_entry: left loop at a valid delta");
1553                break;
1554        case OBJ_COMMIT:
1555        case OBJ_TREE:
1556        case OBJ_BLOB:
1557        case OBJ_TAG:
1558                if (!base_from_cache)
1559                        data = unpack_compressed_entry(p, &w_curs, curpos, size);
1560                break;
1561        default:
1562                data = NULL;
1563                error("unknown object type %i at offset %"PRIuMAX" in %s",
1564                      type, (uintmax_t)obj_offset, p->pack_name);
1565        }
1566
1567        /* PHASE 3: apply deltas in order */
1568
1569        /* invariants:
1570         *   'data' holds the base data, or NULL if there was corruption
1571         */
1572        while (delta_stack_nr) {
1573                void *delta_data;
1574                void *base = data;
1575                void *external_base = NULL;
1576                unsigned long delta_size, base_size = size;
1577                int i;
1578
1579                data = NULL;
1580
1581                if (base)
1582                        add_delta_base_cache(p, obj_offset, base, base_size, type);
1583
1584                if (!base) {
1585                        /*
1586                         * We're probably in deep shit, but let's try to fetch
1587                         * the required base anyway from another pack or loose.
1588                         * This is costly but should happen only in the presence
1589                         * of a corrupted pack, and is better than failing outright.
1590                         */
1591                        struct revindex_entry *revidx;
1592                        const unsigned char *base_sha1;
1593                        revidx = find_pack_revindex(p, obj_offset);
1594                        if (revidx) {
1595                                base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1596                                error("failed to read delta base object %s"
1597                                      " at offset %"PRIuMAX" from %s",
1598                                      sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1599                                      p->pack_name);
1600                                mark_bad_packed_object(p, base_sha1);
1601                                base = read_object(base_sha1, &type, &base_size);
1602                                external_base = base;
1603                        }
1604                }
1605
1606                i = --delta_stack_nr;
1607                obj_offset = delta_stack[i].obj_offset;
1608                curpos = delta_stack[i].curpos;
1609                delta_size = delta_stack[i].size;
1610
1611                if (!base)
1612                        continue;
1613
1614                delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1615
1616                if (!delta_data) {
1617                        error("failed to unpack compressed delta "
1618                              "at offset %"PRIuMAX" from %s",
1619                              (uintmax_t)curpos, p->pack_name);
1620                        data = NULL;
1621                        free(external_base);
1622                        continue;
1623                }
1624
1625                data = patch_delta(base, base_size,
1626                                   delta_data, delta_size,
1627                                   &size);
1628
1629                /*
1630                 * We could not apply the delta; warn the user, but keep going.
1631                 * Our failure will be noticed either in the next iteration of
1632                 * the loop, or if this is the final delta, in the caller when
1633                 * we return NULL. Those code paths will take care of making
1634                 * a more explicit warning and retrying with another copy of
1635                 * the object.
1636                 */
1637                if (!data)
1638                        error("failed to apply delta");
1639
1640                free(delta_data);
1641                free(external_base);
1642        }
1643
1644        if (final_type)
1645                *final_type = type;
1646        if (final_size)
1647                *final_size = size;
1648
1649out:
1650        unuse_pack(&w_curs);
1651
1652        if (delta_stack != small_delta_stack)
1653                free(delta_stack);
1654
1655        return data;
1656}
1657
1658const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1659                                            uint32_t n)
1660{
1661        const unsigned char *index = p->index_data;
1662        if (!index) {
1663                if (open_pack_index(p))
1664                        return NULL;
1665                index = p->index_data;
1666        }
1667        if (n >= p->num_objects)
1668                return NULL;
1669        index += 4 * 256;
1670        if (p->index_version == 1) {
1671                return index + 24 * n + 4;
1672        } else {
1673                index += 8;
1674                return index + 20 * n;
1675        }
1676}
1677
1678const struct object_id *nth_packed_object_oid(struct object_id *oid,
1679                                              struct packed_git *p,
1680                                              uint32_t n)
1681{
1682        const unsigned char *hash = nth_packed_object_sha1(p, n);
1683        if (!hash)
1684                return NULL;
1685        hashcpy(oid->hash, hash);
1686        return oid;
1687}
1688
1689void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1690{
1691        const unsigned char *ptr = vptr;
1692        const unsigned char *start = p->index_data;
1693        const unsigned char *end = start + p->index_size;
1694        if (ptr < start)
1695                die(_("offset before start of pack index for %s (corrupt index?)"),
1696                    p->pack_name);
1697        /* No need to check for underflow; .idx files must be at least 8 bytes */
1698        if (ptr >= end - 8)
1699                die(_("offset beyond end of pack index for %s (truncated index?)"),
1700                    p->pack_name);
1701}
1702
1703off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1704{
1705        const unsigned char *index = p->index_data;
1706        index += 4 * 256;
1707        if (p->index_version == 1) {
1708                return ntohl(*((uint32_t *)(index + 24 * n)));
1709        } else {
1710                uint32_t off;
1711                index += 8 + p->num_objects * (20 + 4);
1712                off = ntohl(*((uint32_t *)(index + 4 * n)));
1713                if (!(off & 0x80000000))
1714                        return off;
1715                index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1716                check_pack_index_ptr(p, index);
1717                return get_be64(index);
1718        }
1719}
1720
1721off_t find_pack_entry_one(const unsigned char *sha1,
1722                                  struct packed_git *p)
1723{
1724        const uint32_t *level1_ofs = p->index_data;
1725        const unsigned char *index = p->index_data;
1726        unsigned hi, lo, stride;
1727        static int debug_lookup = -1;
1728
1729        if (debug_lookup < 0)
1730                debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1731
1732        if (!index) {
1733                if (open_pack_index(p))
1734                        return 0;
1735                level1_ofs = p->index_data;
1736                index = p->index_data;
1737        }
1738        if (p->index_version > 1) {
1739                level1_ofs += 2;
1740                index += 8;
1741        }
1742        index += 4 * 256;
1743        hi = ntohl(level1_ofs[*sha1]);
1744        lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1745        if (p->index_version > 1) {
1746                stride = 20;
1747        } else {
1748                stride = 24;
1749                index += 4;
1750        }
1751
1752        if (debug_lookup)
1753                printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1754                       sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
1755
1756        while (lo < hi) {
1757                unsigned mi = lo + (hi - lo) / 2;
1758                int cmp = hashcmp(index + mi * stride, sha1);
1759
1760                if (debug_lookup)
1761                        printf("lo %u hi %u rg %u mi %u\n",
1762                               lo, hi, hi - lo, mi);
1763                if (!cmp)
1764                        return nth_packed_object_offset(p, mi);
1765                if (cmp > 0)
1766                        hi = mi;
1767                else
1768                        lo = mi+1;
1769        }
1770        return 0;
1771}
1772
1773int is_pack_valid(struct packed_git *p)
1774{
1775        /* An already open pack is known to be valid. */
1776        if (p->pack_fd != -1)
1777                return 1;
1778
1779        /* If the pack has one window completely covering the
1780         * file size, the pack is known to be valid even if
1781         * the descriptor is not currently open.
1782         */
1783        if (p->windows) {
1784                struct pack_window *w = p->windows;
1785
1786                if (!w->offset && w->len == p->pack_size)
1787                        return 1;
1788        }
1789
1790        /* Force the pack to open to prove its valid. */
1791        return !open_packed_git(p);
1792}
1793
1794struct packed_git *find_sha1_pack(const unsigned char *sha1,
1795                                  struct packed_git *packs)
1796{
1797        struct packed_git *p;
1798
1799        for (p = packs; p; p = p->next) {
1800                if (find_pack_entry_one(sha1, p))
1801                        return p;
1802        }
1803        return NULL;
1804
1805}
1806
1807static int fill_pack_entry(const unsigned char *sha1,
1808                           struct pack_entry *e,
1809                           struct packed_git *p)
1810{
1811        off_t offset;
1812
1813        if (p->num_bad_objects) {
1814                unsigned i;
1815                for (i = 0; i < p->num_bad_objects; i++)
1816                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1817                                return 0;
1818        }
1819
1820        offset = find_pack_entry_one(sha1, p);
1821        if (!offset)
1822                return 0;
1823
1824        /*
1825         * We are about to tell the caller where they can locate the
1826         * requested object.  We better make sure the packfile is
1827         * still here and can be accessed before supplying that
1828         * answer, as it may have been deleted since the index was
1829         * loaded!
1830         */
1831        if (!is_pack_valid(p))
1832                return 0;
1833        e->offset = offset;
1834        e->p = p;
1835        hashcpy(e->sha1, sha1);
1836        return 1;
1837}
1838
1839/*
1840 * Iff a pack file contains the object named by sha1, return true and
1841 * store its location to e.
1842 */
1843int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1844{
1845        struct list_head *pos;
1846
1847        prepare_packed_git();
1848        if (!packed_git)
1849                return 0;
1850
1851        list_for_each(pos, &packed_git_mru) {
1852                struct packed_git *p = list_entry(pos, struct packed_git, mru);
1853                if (fill_pack_entry(sha1, e, p)) {
1854                        list_move(&p->mru, &packed_git_mru);
1855                        return 1;
1856                }
1857        }
1858        return 0;
1859}
1860
1861int has_sha1_pack(const unsigned char *sha1)
1862{
1863        struct pack_entry e;
1864        return find_pack_entry(sha1, &e);
1865}
1866
1867int has_pack_index(const unsigned char *sha1)
1868{
1869        struct stat st;
1870        if (stat(sha1_pack_index_name(sha1), &st))
1871                return 0;
1872        return 1;
1873}
1874
1875static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1876{
1877        uint32_t i;
1878        int r = 0;
1879
1880        for (i = 0; i < p->num_objects; i++) {
1881                struct object_id oid;
1882
1883                if (!nth_packed_object_oid(&oid, p, i))
1884                        return error("unable to get sha1 of object %u in %s",
1885                                     i, p->pack_name);
1886
1887                r = cb(&oid, p, i, data);
1888                if (r)
1889                        break;
1890        }
1891        return r;
1892}
1893
1894int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1895{
1896        struct packed_git *p;
1897        int r = 0;
1898        int pack_errors = 0;
1899
1900        prepare_packed_git();
1901        for (p = packed_git; p; p = p->next) {
1902                if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1903                        continue;
1904                if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
1905                    !p->pack_promisor)
1906                        continue;
1907                if (open_pack_index(p)) {
1908                        pack_errors = 1;
1909                        continue;
1910                }
1911                r = for_each_object_in_pack(p, cb, data);
1912                if (r)
1913                        break;
1914        }
1915        return r ? r : pack_errors;
1916}
1917
1918static int add_promisor_object(const struct object_id *oid,
1919                               struct packed_git *pack,
1920                               uint32_t pos,
1921                               void *set_)
1922{
1923        struct oidset *set = set_;
1924        struct object *obj = parse_object(oid);
1925        if (!obj)
1926                return 1;
1927
1928        oidset_insert(set, oid);
1929
1930        /*
1931         * If this is a tree, commit, or tag, the objects it refers
1932         * to are also promisor objects. (Blobs refer to no objects.)
1933         */
1934        if (obj->type == OBJ_TREE) {
1935                struct tree *tree = (struct tree *)obj;
1936                struct tree_desc desc;
1937                struct name_entry entry;
1938                if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
1939                        /*
1940                         * Error messages are given when packs are
1941                         * verified, so do not print any here.
1942                         */
1943                        return 0;
1944                while (tree_entry_gently(&desc, &entry))
1945                        oidset_insert(set, entry.oid);
1946        } else if (obj->type == OBJ_COMMIT) {
1947                struct commit *commit = (struct commit *) obj;
1948                struct commit_list *parents = commit->parents;
1949
1950                oidset_insert(set, &commit->tree->object.oid);
1951                for (; parents; parents = parents->next)
1952                        oidset_insert(set, &parents->item->object.oid);
1953        } else if (obj->type == OBJ_TAG) {
1954                struct tag *tag = (struct tag *) obj;
1955                oidset_insert(set, &tag->tagged->oid);
1956        }
1957        return 0;
1958}
1959
1960int is_promisor_object(const struct object_id *oid)
1961{
1962        static struct oidset promisor_objects;
1963        static int promisor_objects_prepared;
1964
1965        if (!promisor_objects_prepared) {
1966                if (repository_format_partial_clone) {
1967                        for_each_packed_object(add_promisor_object,
1968                                               &promisor_objects,
1969                                               FOR_EACH_OBJECT_PROMISOR_ONLY);
1970                }
1971                promisor_objects_prepared = 1;
1972        }
1973        return oidset_contains(&promisor_objects, oid);
1974}