3d8c1e88a53e23d8c6b93f1b8b915adf15b43ecd
   1/*
   2 * GIT - The information manager from hell
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This handles basic git sha1 object files - packing, unpacking,
   7 * creation etc.
   8 */
   9#include "cache.h"
  10#include "string-list.h"
  11#include "lockfile.h"
  12#include "delta.h"
  13#include "pack.h"
  14#include "blob.h"
  15#include "commit.h"
  16#include "run-command.h"
  17#include "tag.h"
  18#include "tree.h"
  19#include "tree-walk.h"
  20#include "refs.h"
  21#include "pack-revindex.h"
  22#include "sha1-lookup.h"
  23#include "bulk-checkin.h"
  24#include "streaming.h"
  25#include "dir.h"
  26#include "mru.h"
  27#include "list.h"
  28#include "mergesort.h"
  29
  30#ifndef O_NOATIME
  31#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
  32#define O_NOATIME 01000000
  33#else
  34#define O_NOATIME 0
  35#endif
  36#endif
  37
  38#define SZ_FMT PRIuMAX
  39static inline uintmax_t sz_fmt(size_t s) { return s; }
  40
  41const unsigned char null_sha1[20];
  42const struct object_id null_oid;
  43const struct object_id empty_tree_oid = {
  44        EMPTY_TREE_SHA1_BIN_LITERAL
  45};
  46const struct object_id empty_blob_oid = {
  47        EMPTY_BLOB_SHA1_BIN_LITERAL
  48};
  49
  50/*
  51 * This is meant to hold a *small* number of objects that you would
  52 * want read_sha1_file() to be able to return, but yet you do not want
  53 * to write them into the object store (e.g. a browse-only
  54 * application).
  55 */
  56static struct cached_object {
  57        unsigned char sha1[20];
  58        enum object_type type;
  59        void *buf;
  60        unsigned long size;
  61} *cached_objects;
  62static int cached_object_nr, cached_object_alloc;
  63
  64static struct cached_object empty_tree = {
  65        EMPTY_TREE_SHA1_BIN_LITERAL,
  66        OBJ_TREE,
  67        "",
  68        0
  69};
  70
  71static struct cached_object *find_cached_object(const unsigned char *sha1)
  72{
  73        int i;
  74        struct cached_object *co = cached_objects;
  75
  76        for (i = 0; i < cached_object_nr; i++, co++) {
  77                if (!hashcmp(co->sha1, sha1))
  78                        return co;
  79        }
  80        if (!hashcmp(sha1, empty_tree.sha1))
  81                return &empty_tree;
  82        return NULL;
  83}
  84
  85int mkdir_in_gitdir(const char *path)
  86{
  87        if (mkdir(path, 0777)) {
  88                int saved_errno = errno;
  89                struct stat st;
  90                struct strbuf sb = STRBUF_INIT;
  91
  92                if (errno != EEXIST)
  93                        return -1;
  94                /*
  95                 * Are we looking at a path in a symlinked worktree
  96                 * whose original repository does not yet have it?
  97                 * e.g. .git/rr-cache pointing at its original
  98                 * repository in which the user hasn't performed any
  99                 * conflict resolution yet?
 100                 */
 101                if (lstat(path, &st) || !S_ISLNK(st.st_mode) ||
 102                    strbuf_readlink(&sb, path, st.st_size) ||
 103                    !is_absolute_path(sb.buf) ||
 104                    mkdir(sb.buf, 0777)) {
 105                        strbuf_release(&sb);
 106                        errno = saved_errno;
 107                        return -1;
 108                }
 109                strbuf_release(&sb);
 110        }
 111        return adjust_shared_perm(path);
 112}
 113
 114enum scld_error safe_create_leading_directories(char *path)
 115{
 116        char *next_component = path + offset_1st_component(path);
 117        enum scld_error ret = SCLD_OK;
 118
 119        while (ret == SCLD_OK && next_component) {
 120                struct stat st;
 121                char *slash = next_component, slash_character;
 122
 123                while (*slash && !is_dir_sep(*slash))
 124                        slash++;
 125
 126                if (!*slash)
 127                        break;
 128
 129                next_component = slash + 1;
 130                while (is_dir_sep(*next_component))
 131                        next_component++;
 132                if (!*next_component)
 133                        break;
 134
 135                slash_character = *slash;
 136                *slash = '\0';
 137                if (!stat(path, &st)) {
 138                        /* path exists */
 139                        if (!S_ISDIR(st.st_mode))
 140                                ret = SCLD_EXISTS;
 141                } else if (mkdir(path, 0777)) {
 142                        if (errno == EEXIST &&
 143                            !stat(path, &st) && S_ISDIR(st.st_mode))
 144                                ; /* somebody created it since we checked */
 145                        else if (errno == ENOENT)
 146                                /*
 147                                 * Either mkdir() failed because
 148                                 * somebody just pruned the containing
 149                                 * directory, or stat() failed because
 150                                 * the file that was in our way was
 151                                 * just removed.  Either way, inform
 152                                 * the caller that it might be worth
 153                                 * trying again:
 154                                 */
 155                                ret = SCLD_VANISHED;
 156                        else
 157                                ret = SCLD_FAILED;
 158                } else if (adjust_shared_perm(path)) {
 159                        ret = SCLD_PERMS;
 160                }
 161                *slash = slash_character;
 162        }
 163        return ret;
 164}
 165
 166enum scld_error safe_create_leading_directories_const(const char *path)
 167{
 168        /* path points to cache entries, so xstrdup before messing with it */
 169        char *buf = xstrdup(path);
 170        enum scld_error result = safe_create_leading_directories(buf);
 171        free(buf);
 172        return result;
 173}
 174
 175static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
 176{
 177        int i;
 178        for (i = 0; i < 20; i++) {
 179                static char hex[] = "0123456789abcdef";
 180                unsigned int val = sha1[i];
 181                char *pos = pathbuf + i*2 + (i > 0);
 182                *pos++ = hex[val >> 4];
 183                *pos = hex[val & 0xf];
 184        }
 185}
 186
 187const char *sha1_file_name(const unsigned char *sha1)
 188{
 189        static char buf[PATH_MAX];
 190        const char *objdir;
 191        int len;
 192
 193        objdir = get_object_directory();
 194        len = strlen(objdir);
 195
 196        /* '/' + sha1(2) + '/' + sha1(38) + '\0' */
 197        if (len + 43 > PATH_MAX)
 198                die("insanely long object directory %s", objdir);
 199        memcpy(buf, objdir, len);
 200        buf[len] = '/';
 201        buf[len+3] = '/';
 202        buf[len+42] = '\0';
 203        fill_sha1_path(buf + len + 1, sha1);
 204        return buf;
 205}
 206
 207/*
 208 * Return the name of the pack or index file with the specified sha1
 209 * in its filename.  *base and *name are scratch space that must be
 210 * provided by the caller.  which should be "pack" or "idx".
 211 */
 212static char *sha1_get_pack_name(const unsigned char *sha1,
 213                                struct strbuf *buf,
 214                                const char *which)
 215{
 216        strbuf_reset(buf);
 217        strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
 218                    sha1_to_hex(sha1), which);
 219        return buf->buf;
 220}
 221
 222char *sha1_pack_name(const unsigned char *sha1)
 223{
 224        static struct strbuf buf = STRBUF_INIT;
 225        return sha1_get_pack_name(sha1, &buf, "pack");
 226}
 227
 228char *sha1_pack_index_name(const unsigned char *sha1)
 229{
 230        static struct strbuf buf = STRBUF_INIT;
 231        return sha1_get_pack_name(sha1, &buf, "idx");
 232}
 233
 234struct alternate_object_database *alt_odb_list;
 235static struct alternate_object_database **alt_odb_tail;
 236
 237/*
 238 * Return non-zero iff the path is usable as an alternate object database.
 239 */
 240static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
 241{
 242        struct alternate_object_database *alt;
 243
 244        /* Detect cases where alternate disappeared */
 245        if (!is_directory(path->buf)) {
 246                error("object directory %s does not exist; "
 247                      "check .git/objects/info/alternates.",
 248                      path->buf);
 249                return 0;
 250        }
 251
 252        /*
 253         * Prevent the common mistake of listing the same
 254         * thing twice, or object directory itself.
 255         */
 256        for (alt = alt_odb_list; alt; alt = alt->next) {
 257                if (path->len == alt->name - alt->base - 1 &&
 258                    !memcmp(path->buf, alt->base, path->len))
 259                        return 0;
 260        }
 261        if (!fspathcmp(path->buf, normalized_objdir))
 262                return 0;
 263
 264        return 1;
 265}
 266
 267/*
 268 * Prepare alternate object database registry.
 269 *
 270 * The variable alt_odb_list points at the list of struct
 271 * alternate_object_database.  The elements on this list come from
 272 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
 273 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
 274 * whose contents is similar to that environment variable but can be
 275 * LF separated.  Its base points at a statically allocated buffer that
 276 * contains "/the/directory/corresponding/to/.git/objects/...", while
 277 * its name points just after the slash at the end of ".git/objects/"
 278 * in the example above, and has enough space to hold 40-byte hex
 279 * SHA1, an extra slash for the first level indirection, and the
 280 * terminating NUL.
 281 */
 282static int link_alt_odb_entry(const char *entry, const char *relative_base,
 283        int depth, const char *normalized_objdir)
 284{
 285        struct alternate_object_database *ent;
 286        size_t entlen;
 287        struct strbuf pathbuf = STRBUF_INIT;
 288
 289        if (!is_absolute_path(entry) && relative_base) {
 290                strbuf_addstr(&pathbuf, real_path(relative_base));
 291                strbuf_addch(&pathbuf, '/');
 292        }
 293        strbuf_addstr(&pathbuf, entry);
 294
 295        if (strbuf_normalize_path(&pathbuf) < 0) {
 296                error("unable to normalize alternate object path: %s",
 297                      pathbuf.buf);
 298                strbuf_release(&pathbuf);
 299                return -1;
 300        }
 301
 302        /*
 303         * The trailing slash after the directory name is given by
 304         * this function at the end. Remove duplicates.
 305         */
 306        while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
 307                strbuf_setlen(&pathbuf, pathbuf.len - 1);
 308
 309        if (!alt_odb_usable(&pathbuf, normalized_objdir)) {
 310                strbuf_release(&pathbuf);
 311                return -1;
 312        }
 313
 314        entlen = st_add(pathbuf.len, 43); /* '/' + 2 hex + '/' + 38 hex + NUL */
 315        ent = xmalloc(st_add(sizeof(*ent), entlen));
 316        memcpy(ent->base, pathbuf.buf, pathbuf.len);
 317
 318        ent->name = ent->base + pathbuf.len + 1;
 319        ent->base[pathbuf.len] = '/';
 320        ent->base[pathbuf.len + 3] = '/';
 321        ent->base[entlen-1] = 0;
 322
 323        /* add the alternate entry */
 324        *alt_odb_tail = ent;
 325        alt_odb_tail = &(ent->next);
 326        ent->next = NULL;
 327
 328        /* recursively add alternates */
 329        read_info_alternates(pathbuf.buf, depth + 1);
 330
 331        strbuf_release(&pathbuf);
 332        return 0;
 333}
 334
 335static void link_alt_odb_entries(const char *alt, int len, int sep,
 336                                 const char *relative_base, int depth)
 337{
 338        struct string_list entries = STRING_LIST_INIT_NODUP;
 339        char *alt_copy;
 340        int i;
 341        struct strbuf objdirbuf = STRBUF_INIT;
 342
 343        if (depth > 5) {
 344                error("%s: ignoring alternate object stores, nesting too deep.",
 345                                relative_base);
 346                return;
 347        }
 348
 349        strbuf_add_absolute_path(&objdirbuf, get_object_directory());
 350        if (strbuf_normalize_path(&objdirbuf) < 0)
 351                die("unable to normalize object directory: %s",
 352                    objdirbuf.buf);
 353
 354        alt_copy = xmemdupz(alt, len);
 355        string_list_split_in_place(&entries, alt_copy, sep, -1);
 356        for (i = 0; i < entries.nr; i++) {
 357                const char *entry = entries.items[i].string;
 358                if (entry[0] == '\0' || entry[0] == '#')
 359                        continue;
 360                if (!is_absolute_path(entry) && depth) {
 361                        error("%s: ignoring relative alternate object store %s",
 362                                        relative_base, entry);
 363                } else {
 364                        link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf);
 365                }
 366        }
 367        string_list_clear(&entries, 0);
 368        free(alt_copy);
 369        strbuf_release(&objdirbuf);
 370}
 371
 372void read_info_alternates(const char * relative_base, int depth)
 373{
 374        char *map;
 375        size_t mapsz;
 376        struct stat st;
 377        char *path;
 378        int fd;
 379
 380        path = xstrfmt("%s/info/alternates", relative_base);
 381        fd = git_open_noatime(path);
 382        free(path);
 383        if (fd < 0)
 384                return;
 385        if (fstat(fd, &st) || (st.st_size == 0)) {
 386                close(fd);
 387                return;
 388        }
 389        mapsz = xsize_t(st.st_size);
 390        map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
 391        close(fd);
 392
 393        link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
 394
 395        munmap(map, mapsz);
 396}
 397
 398void add_to_alternates_file(const char *reference)
 399{
 400        struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
 401        char *alts = git_pathdup("objects/info/alternates");
 402        FILE *in, *out;
 403
 404        hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
 405        out = fdopen_lock_file(lock, "w");
 406        if (!out)
 407                die_errno("unable to fdopen alternates lockfile");
 408
 409        in = fopen(alts, "r");
 410        if (in) {
 411                struct strbuf line = STRBUF_INIT;
 412                int found = 0;
 413
 414                while (strbuf_getline(&line, in) != EOF) {
 415                        if (!strcmp(reference, line.buf)) {
 416                                found = 1;
 417                                break;
 418                        }
 419                        fprintf_or_die(out, "%s\n", line.buf);
 420                }
 421
 422                strbuf_release(&line);
 423                fclose(in);
 424
 425                if (found) {
 426                        rollback_lock_file(lock);
 427                        lock = NULL;
 428                }
 429        }
 430        else if (errno != ENOENT)
 431                die_errno("unable to read alternates file");
 432
 433        if (lock) {
 434                fprintf_or_die(out, "%s\n", reference);
 435                if (commit_lock_file(lock))
 436                        die_errno("unable to move new alternates file into place");
 437                if (alt_odb_tail)
 438                        link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
 439        }
 440        free(alts);
 441}
 442
 443/*
 444 * Compute the exact path an alternate is at and returns it. In case of
 445 * error NULL is returned and the human readable error is added to `err`
 446 * `path` may be relative and should point to $GITDIR.
 447 * `err` must not be null.
 448 */
 449char *compute_alternate_path(const char *path, struct strbuf *err)
 450{
 451        char *ref_git = NULL;
 452        const char *repo, *ref_git_s;
 453        int seen_error = 0;
 454
 455        ref_git_s = real_path_if_valid(path);
 456        if (!ref_git_s) {
 457                seen_error = 1;
 458                strbuf_addf(err, _("path '%s' does not exist"), path);
 459                goto out;
 460        } else
 461                /*
 462                 * Beware: read_gitfile(), real_path() and mkpath()
 463                 * return static buffer
 464                 */
 465                ref_git = xstrdup(ref_git_s);
 466
 467        repo = read_gitfile(ref_git);
 468        if (!repo)
 469                repo = read_gitfile(mkpath("%s/.git", ref_git));
 470        if (repo) {
 471                free(ref_git);
 472                ref_git = xstrdup(repo);
 473        }
 474
 475        if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
 476                char *ref_git_git = mkpathdup("%s/.git", ref_git);
 477                free(ref_git);
 478                ref_git = ref_git_git;
 479        } else if (!is_directory(mkpath("%s/objects", ref_git))) {
 480                struct strbuf sb = STRBUF_INIT;
 481                seen_error = 1;
 482                if (get_common_dir(&sb, ref_git)) {
 483                        strbuf_addf(err,
 484                                    _("reference repository '%s' as a linked "
 485                                      "checkout is not supported yet."),
 486                                    path);
 487                        goto out;
 488                }
 489
 490                strbuf_addf(err, _("reference repository '%s' is not a "
 491                                        "local repository."), path);
 492                goto out;
 493        }
 494
 495        if (!access(mkpath("%s/shallow", ref_git), F_OK)) {
 496                strbuf_addf(err, _("reference repository '%s' is shallow"),
 497                            path);
 498                seen_error = 1;
 499                goto out;
 500        }
 501
 502        if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) {
 503                strbuf_addf(err,
 504                            _("reference repository '%s' is grafted"),
 505                            path);
 506                seen_error = 1;
 507                goto out;
 508        }
 509
 510out:
 511        if (seen_error) {
 512                free(ref_git);
 513                ref_git = NULL;
 514        }
 515
 516        return ref_git;
 517}
 518
 519int foreach_alt_odb(alt_odb_fn fn, void *cb)
 520{
 521        struct alternate_object_database *ent;
 522        int r = 0;
 523
 524        prepare_alt_odb();
 525        for (ent = alt_odb_list; ent; ent = ent->next) {
 526                r = fn(ent, cb);
 527                if (r)
 528                        break;
 529        }
 530        return r;
 531}
 532
 533void prepare_alt_odb(void)
 534{
 535        const char *alt;
 536
 537        if (alt_odb_tail)
 538                return;
 539
 540        alt = getenv(ALTERNATE_DB_ENVIRONMENT);
 541        if (!alt) alt = "";
 542
 543        alt_odb_tail = &alt_odb_list;
 544        link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
 545
 546        read_info_alternates(get_object_directory(), 0);
 547}
 548
 549/* Returns 1 if we have successfully freshened the file, 0 otherwise. */
 550static int freshen_file(const char *fn)
 551{
 552        struct utimbuf t;
 553        t.actime = t.modtime = time(NULL);
 554        return !utime(fn, &t);
 555}
 556
 557/*
 558 * All of the check_and_freshen functions return 1 if the file exists and was
 559 * freshened (if freshening was requested), 0 otherwise. If they return
 560 * 0, you should not assume that it is safe to skip a write of the object (it
 561 * either does not exist on disk, or has a stale mtime and may be subject to
 562 * pruning).
 563 */
 564static int check_and_freshen_file(const char *fn, int freshen)
 565{
 566        if (access(fn, F_OK))
 567                return 0;
 568        if (freshen && !freshen_file(fn))
 569                return 0;
 570        return 1;
 571}
 572
 573static int check_and_freshen_local(const unsigned char *sha1, int freshen)
 574{
 575        return check_and_freshen_file(sha1_file_name(sha1), freshen);
 576}
 577
 578static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
 579{
 580        struct alternate_object_database *alt;
 581        prepare_alt_odb();
 582        for (alt = alt_odb_list; alt; alt = alt->next) {
 583                fill_sha1_path(alt->name, sha1);
 584                if (check_and_freshen_file(alt->base, freshen))
 585                        return 1;
 586        }
 587        return 0;
 588}
 589
 590static int check_and_freshen(const unsigned char *sha1, int freshen)
 591{
 592        return check_and_freshen_local(sha1, freshen) ||
 593               check_and_freshen_nonlocal(sha1, freshen);
 594}
 595
 596int has_loose_object_nonlocal(const unsigned char *sha1)
 597{
 598        return check_and_freshen_nonlocal(sha1, 0);
 599}
 600
 601static int has_loose_object(const unsigned char *sha1)
 602{
 603        return check_and_freshen(sha1, 0);
 604}
 605
 606static unsigned int pack_used_ctr;
 607static unsigned int pack_mmap_calls;
 608static unsigned int peak_pack_open_windows;
 609static unsigned int pack_open_windows;
 610static unsigned int pack_open_fds;
 611static unsigned int pack_max_fds;
 612static size_t peak_pack_mapped;
 613static size_t pack_mapped;
 614struct packed_git *packed_git;
 615
 616static struct mru packed_git_mru_storage;
 617struct mru *packed_git_mru = &packed_git_mru_storage;
 618
 619void pack_report(void)
 620{
 621        fprintf(stderr,
 622                "pack_report: getpagesize()            = %10" SZ_FMT "\n"
 623                "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
 624                "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
 625                sz_fmt(getpagesize()),
 626                sz_fmt(packed_git_window_size),
 627                sz_fmt(packed_git_limit));
 628        fprintf(stderr,
 629                "pack_report: pack_used_ctr            = %10u\n"
 630                "pack_report: pack_mmap_calls          = %10u\n"
 631                "pack_report: pack_open_windows        = %10u / %10u\n"
 632                "pack_report: pack_mapped              = "
 633                        "%10" SZ_FMT " / %10" SZ_FMT "\n",
 634                pack_used_ctr,
 635                pack_mmap_calls,
 636                pack_open_windows, peak_pack_open_windows,
 637                sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
 638}
 639
 640/*
 641 * Open and mmap the index file at path, perform a couple of
 642 * consistency checks, then record its information to p.  Return 0 on
 643 * success.
 644 */
 645static int check_packed_git_idx(const char *path, struct packed_git *p)
 646{
 647        void *idx_map;
 648        struct pack_idx_header *hdr;
 649        size_t idx_size;
 650        uint32_t version, nr, i, *index;
 651        int fd = git_open_noatime(path);
 652        struct stat st;
 653
 654        if (fd < 0)
 655                return -1;
 656        if (fstat(fd, &st)) {
 657                close(fd);
 658                return -1;
 659        }
 660        idx_size = xsize_t(st.st_size);
 661        if (idx_size < 4 * 256 + 20 + 20) {
 662                close(fd);
 663                return error("index file %s is too small", path);
 664        }
 665        idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 666        close(fd);
 667
 668        hdr = idx_map;
 669        if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
 670                version = ntohl(hdr->idx_version);
 671                if (version < 2 || version > 2) {
 672                        munmap(idx_map, idx_size);
 673                        return error("index file %s is version %"PRIu32
 674                                     " and is not supported by this binary"
 675                                     " (try upgrading GIT to a newer version)",
 676                                     path, version);
 677                }
 678        } else
 679                version = 1;
 680
 681        nr = 0;
 682        index = idx_map;
 683        if (version > 1)
 684                index += 2;  /* skip index header */
 685        for (i = 0; i < 256; i++) {
 686                uint32_t n = ntohl(index[i]);
 687                if (n < nr) {
 688                        munmap(idx_map, idx_size);
 689                        return error("non-monotonic index %s", path);
 690                }
 691                nr = n;
 692        }
 693
 694        if (version == 1) {
 695                /*
 696                 * Total size:
 697                 *  - 256 index entries 4 bytes each
 698                 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
 699                 *  - 20-byte SHA1 of the packfile
 700                 *  - 20-byte SHA1 file checksum
 701                 */
 702                if (idx_size != 4*256 + nr * 24 + 20 + 20) {
 703                        munmap(idx_map, idx_size);
 704                        return error("wrong index v1 file size in %s", path);
 705                }
 706        } else if (version == 2) {
 707                /*
 708                 * Minimum size:
 709                 *  - 8 bytes of header
 710                 *  - 256 index entries 4 bytes each
 711                 *  - 20-byte sha1 entry * nr
 712                 *  - 4-byte crc entry * nr
 713                 *  - 4-byte offset entry * nr
 714                 *  - 20-byte SHA1 of the packfile
 715                 *  - 20-byte SHA1 file checksum
 716                 * And after the 4-byte offset table might be a
 717                 * variable sized table containing 8-byte entries
 718                 * for offsets larger than 2^31.
 719                 */
 720                unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
 721                unsigned long max_size = min_size;
 722                if (nr)
 723                        max_size += (nr - 1)*8;
 724                if (idx_size < min_size || idx_size > max_size) {
 725                        munmap(idx_map, idx_size);
 726                        return error("wrong index v2 file size in %s", path);
 727                }
 728                if (idx_size != min_size &&
 729                    /*
 730                     * make sure we can deal with large pack offsets.
 731                     * 31-bit signed offset won't be enough, neither
 732                     * 32-bit unsigned one will be.
 733                     */
 734                    (sizeof(off_t) <= 4)) {
 735                        munmap(idx_map, idx_size);
 736                        return error("pack too large for current definition of off_t in %s", path);
 737                }
 738        }
 739
 740        p->index_version = version;
 741        p->index_data = idx_map;
 742        p->index_size = idx_size;
 743        p->num_objects = nr;
 744        return 0;
 745}
 746
 747int open_pack_index(struct packed_git *p)
 748{
 749        char *idx_name;
 750        size_t len;
 751        int ret;
 752
 753        if (p->index_data)
 754                return 0;
 755
 756        if (!strip_suffix(p->pack_name, ".pack", &len))
 757                die("BUG: pack_name does not end in .pack");
 758        idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
 759        ret = check_packed_git_idx(idx_name, p);
 760        free(idx_name);
 761        return ret;
 762}
 763
 764static void scan_windows(struct packed_git *p,
 765        struct packed_git **lru_p,
 766        struct pack_window **lru_w,
 767        struct pack_window **lru_l)
 768{
 769        struct pack_window *w, *w_l;
 770
 771        for (w_l = NULL, w = p->windows; w; w = w->next) {
 772                if (!w->inuse_cnt) {
 773                        if (!*lru_w || w->last_used < (*lru_w)->last_used) {
 774                                *lru_p = p;
 775                                *lru_w = w;
 776                                *lru_l = w_l;
 777                        }
 778                }
 779                w_l = w;
 780        }
 781}
 782
 783static int unuse_one_window(struct packed_git *current)
 784{
 785        struct packed_git *p, *lru_p = NULL;
 786        struct pack_window *lru_w = NULL, *lru_l = NULL;
 787
 788        if (current)
 789                scan_windows(current, &lru_p, &lru_w, &lru_l);
 790        for (p = packed_git; p; p = p->next)
 791                scan_windows(p, &lru_p, &lru_w, &lru_l);
 792        if (lru_p) {
 793                munmap(lru_w->base, lru_w->len);
 794                pack_mapped -= lru_w->len;
 795                if (lru_l)
 796                        lru_l->next = lru_w->next;
 797                else
 798                        lru_p->windows = lru_w->next;
 799                free(lru_w);
 800                pack_open_windows--;
 801                return 1;
 802        }
 803        return 0;
 804}
 805
 806void release_pack_memory(size_t need)
 807{
 808        size_t cur = pack_mapped;
 809        while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
 810                ; /* nothing */
 811}
 812
 813static void mmap_limit_check(size_t length)
 814{
 815        static size_t limit = 0;
 816        if (!limit) {
 817                limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
 818                if (!limit)
 819                        limit = SIZE_MAX;
 820        }
 821        if (length > limit)
 822                die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
 823                    (uintmax_t)length, (uintmax_t)limit);
 824}
 825
 826void *xmmap_gently(void *start, size_t length,
 827                  int prot, int flags, int fd, off_t offset)
 828{
 829        void *ret;
 830
 831        mmap_limit_check(length);
 832        ret = mmap(start, length, prot, flags, fd, offset);
 833        if (ret == MAP_FAILED) {
 834                if (!length)
 835                        return NULL;
 836                release_pack_memory(length);
 837                ret = mmap(start, length, prot, flags, fd, offset);
 838        }
 839        return ret;
 840}
 841
 842void *xmmap(void *start, size_t length,
 843        int prot, int flags, int fd, off_t offset)
 844{
 845        void *ret = xmmap_gently(start, length, prot, flags, fd, offset);
 846        if (ret == MAP_FAILED)
 847                die_errno("mmap failed");
 848        return ret;
 849}
 850
 851void close_pack_windows(struct packed_git *p)
 852{
 853        while (p->windows) {
 854                struct pack_window *w = p->windows;
 855
 856                if (w->inuse_cnt)
 857                        die("pack '%s' still has open windows to it",
 858                            p->pack_name);
 859                munmap(w->base, w->len);
 860                pack_mapped -= w->len;
 861                pack_open_windows--;
 862                p->windows = w->next;
 863                free(w);
 864        }
 865}
 866
 867static int close_pack_fd(struct packed_git *p)
 868{
 869        if (p->pack_fd < 0)
 870                return 0;
 871
 872        close(p->pack_fd);
 873        pack_open_fds--;
 874        p->pack_fd = -1;
 875
 876        return 1;
 877}
 878
 879static void close_pack(struct packed_git *p)
 880{
 881        close_pack_windows(p);
 882        close_pack_fd(p);
 883        close_pack_index(p);
 884}
 885
 886void close_all_packs(void)
 887{
 888        struct packed_git *p;
 889
 890        for (p = packed_git; p; p = p->next)
 891                if (p->do_not_close)
 892                        die("BUG: want to close pack marked 'do-not-close'");
 893                else
 894                        close_pack(p);
 895}
 896
 897
 898/*
 899 * The LRU pack is the one with the oldest MRU window, preferring packs
 900 * with no used windows, or the oldest mtime if it has no windows allocated.
 901 */
 902static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
 903{
 904        struct pack_window *w, *this_mru_w;
 905        int has_windows_inuse = 0;
 906
 907        /*
 908         * Reject this pack if it has windows and the previously selected
 909         * one does not.  If this pack does not have windows, reject
 910         * it if the pack file is newer than the previously selected one.
 911         */
 912        if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
 913                return;
 914
 915        for (w = this_mru_w = p->windows; w; w = w->next) {
 916                /*
 917                 * Reject this pack if any of its windows are in use,
 918                 * but the previously selected pack did not have any
 919                 * inuse windows.  Otherwise, record that this pack
 920                 * has windows in use.
 921                 */
 922                if (w->inuse_cnt) {
 923                        if (*accept_windows_inuse)
 924                                has_windows_inuse = 1;
 925                        else
 926                                return;
 927                }
 928
 929                if (w->last_used > this_mru_w->last_used)
 930                        this_mru_w = w;
 931
 932                /*
 933                 * Reject this pack if it has windows that have been
 934                 * used more recently than the previously selected pack.
 935                 * If the previously selected pack had windows inuse and
 936                 * we have not encountered a window in this pack that is
 937                 * inuse, skip this check since we prefer a pack with no
 938                 * inuse windows to one that has inuse windows.
 939                 */
 940                if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
 941                    this_mru_w->last_used > (*mru_w)->last_used)
 942                        return;
 943        }
 944
 945        /*
 946         * Select this pack.
 947         */
 948        *mru_w = this_mru_w;
 949        *lru_p = p;
 950        *accept_windows_inuse = has_windows_inuse;
 951}
 952
 953static int close_one_pack(void)
 954{
 955        struct packed_git *p, *lru_p = NULL;
 956        struct pack_window *mru_w = NULL;
 957        int accept_windows_inuse = 1;
 958
 959        for (p = packed_git; p; p = p->next) {
 960                if (p->pack_fd == -1)
 961                        continue;
 962                find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
 963        }
 964
 965        if (lru_p)
 966                return close_pack_fd(lru_p);
 967
 968        return 0;
 969}
 970
 971void unuse_pack(struct pack_window **w_cursor)
 972{
 973        struct pack_window *w = *w_cursor;
 974        if (w) {
 975                w->inuse_cnt--;
 976                *w_cursor = NULL;
 977        }
 978}
 979
 980void close_pack_index(struct packed_git *p)
 981{
 982        if (p->index_data) {
 983                munmap((void *)p->index_data, p->index_size);
 984                p->index_data = NULL;
 985        }
 986}
 987
 988static unsigned int get_max_fd_limit(void)
 989{
 990#ifdef RLIMIT_NOFILE
 991        {
 992                struct rlimit lim;
 993
 994                if (!getrlimit(RLIMIT_NOFILE, &lim))
 995                        return lim.rlim_cur;
 996        }
 997#endif
 998
 999#ifdef _SC_OPEN_MAX
1000        {
1001                long open_max = sysconf(_SC_OPEN_MAX);
1002                if (0 < open_max)
1003                        return open_max;
1004                /*
1005                 * Otherwise, we got -1 for one of the two
1006                 * reasons:
1007                 *
1008                 * (1) sysconf() did not understand _SC_OPEN_MAX
1009                 *     and signaled an error with -1; or
1010                 * (2) sysconf() said there is no limit.
1011                 *
1012                 * We _could_ clear errno before calling sysconf() to
1013                 * tell these two cases apart and return a huge number
1014                 * in the latter case to let the caller cap it to a
1015                 * value that is not so selfish, but letting the
1016                 * fallback OPEN_MAX codepath take care of these cases
1017                 * is a lot simpler.
1018                 */
1019        }
1020#endif
1021
1022#ifdef OPEN_MAX
1023        return OPEN_MAX;
1024#else
1025        return 1; /* see the caller ;-) */
1026#endif
1027}
1028
1029/*
1030 * Do not call this directly as this leaks p->pack_fd on error return;
1031 * call open_packed_git() instead.
1032 */
1033static int open_packed_git_1(struct packed_git *p)
1034{
1035        struct stat st;
1036        struct pack_header hdr;
1037        unsigned char sha1[20];
1038        unsigned char *idx_sha1;
1039        long fd_flag;
1040
1041        if (!p->index_data && open_pack_index(p))
1042                return error("packfile %s index unavailable", p->pack_name);
1043
1044        if (!pack_max_fds) {
1045                unsigned int max_fds = get_max_fd_limit();
1046
1047                /* Save 3 for stdin/stdout/stderr, 22 for work */
1048                if (25 < max_fds)
1049                        pack_max_fds = max_fds - 25;
1050                else
1051                        pack_max_fds = 1;
1052        }
1053
1054        while (pack_max_fds <= pack_open_fds && close_one_pack())
1055                ; /* nothing */
1056
1057        p->pack_fd = git_open_noatime(p->pack_name);
1058        if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
1059                return -1;
1060        pack_open_fds++;
1061
1062        /* If we created the struct before we had the pack we lack size. */
1063        if (!p->pack_size) {
1064                if (!S_ISREG(st.st_mode))
1065                        return error("packfile %s not a regular file", p->pack_name);
1066                p->pack_size = st.st_size;
1067        } else if (p->pack_size != st.st_size)
1068                return error("packfile %s size changed", p->pack_name);
1069
1070        /* We leave these file descriptors open with sliding mmap;
1071         * there is no point keeping them open across exec(), though.
1072         */
1073        fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
1074        if (fd_flag < 0)
1075                return error("cannot determine file descriptor flags");
1076        fd_flag |= FD_CLOEXEC;
1077        if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1078                return error("cannot set FD_CLOEXEC");
1079
1080        /* Verify we recognize this pack file format. */
1081        if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
1082                return error("file %s is far too short to be a packfile", p->pack_name);
1083        if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
1084                return error("file %s is not a GIT packfile", p->pack_name);
1085        if (!pack_version_ok(hdr.hdr_version))
1086                return error("packfile %s is version %"PRIu32" and not"
1087                        " supported (try upgrading GIT to a newer version)",
1088                        p->pack_name, ntohl(hdr.hdr_version));
1089
1090        /* Verify the pack matches its index. */
1091        if (p->num_objects != ntohl(hdr.hdr_entries))
1092                return error("packfile %s claims to have %"PRIu32" objects"
1093                             " while index indicates %"PRIu32" objects",
1094                             p->pack_name, ntohl(hdr.hdr_entries),
1095                             p->num_objects);
1096        if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
1097                return error("end of packfile %s is unavailable", p->pack_name);
1098        if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
1099                return error("packfile %s signature is unavailable", p->pack_name);
1100        idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
1101        if (hashcmp(sha1, idx_sha1))
1102                return error("packfile %s does not match index", p->pack_name);
1103        return 0;
1104}
1105
1106static int open_packed_git(struct packed_git *p)
1107{
1108        if (!open_packed_git_1(p))
1109                return 0;
1110        close_pack_fd(p);
1111        return -1;
1112}
1113
1114static int in_window(struct pack_window *win, off_t offset)
1115{
1116        /* We must promise at least 20 bytes (one hash) after the
1117         * offset is available from this window, otherwise the offset
1118         * is not actually in this window and a different window (which
1119         * has that one hash excess) must be used.  This is to support
1120         * the object header and delta base parsing routines below.
1121         */
1122        off_t win_off = win->offset;
1123        return win_off <= offset
1124                && (offset + 20) <= (win_off + win->len);
1125}
1126
1127unsigned char *use_pack(struct packed_git *p,
1128                struct pack_window **w_cursor,
1129                off_t offset,
1130                unsigned long *left)
1131{
1132        struct pack_window *win = *w_cursor;
1133
1134        /* Since packfiles end in a hash of their content and it's
1135         * pointless to ask for an offset into the middle of that
1136         * hash, and the in_window function above wouldn't match
1137         * don't allow an offset too close to the end of the file.
1138         */
1139        if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
1140                die("packfile %s cannot be accessed", p->pack_name);
1141        if (offset > (p->pack_size - 20))
1142                die("offset beyond end of packfile (truncated pack?)");
1143        if (offset < 0)
1144                die(_("offset before end of packfile (broken .idx?)"));
1145
1146        if (!win || !in_window(win, offset)) {
1147                if (win)
1148                        win->inuse_cnt--;
1149                for (win = p->windows; win; win = win->next) {
1150                        if (in_window(win, offset))
1151                                break;
1152                }
1153                if (!win) {
1154                        size_t window_align = packed_git_window_size / 2;
1155                        off_t len;
1156
1157                        if (p->pack_fd == -1 && open_packed_git(p))
1158                                die("packfile %s cannot be accessed", p->pack_name);
1159
1160                        win = xcalloc(1, sizeof(*win));
1161                        win->offset = (offset / window_align) * window_align;
1162                        len = p->pack_size - win->offset;
1163                        if (len > packed_git_window_size)
1164                                len = packed_git_window_size;
1165                        win->len = (size_t)len;
1166                        pack_mapped += win->len;
1167                        while (packed_git_limit < pack_mapped
1168                                && unuse_one_window(p))
1169                                ; /* nothing */
1170                        win->base = xmmap(NULL, win->len,
1171                                PROT_READ, MAP_PRIVATE,
1172                                p->pack_fd, win->offset);
1173                        if (win->base == MAP_FAILED)
1174                                die_errno("packfile %s cannot be mapped",
1175                                          p->pack_name);
1176                        if (!win->offset && win->len == p->pack_size
1177                                && !p->do_not_close)
1178                                close_pack_fd(p);
1179                        pack_mmap_calls++;
1180                        pack_open_windows++;
1181                        if (pack_mapped > peak_pack_mapped)
1182                                peak_pack_mapped = pack_mapped;
1183                        if (pack_open_windows > peak_pack_open_windows)
1184                                peak_pack_open_windows = pack_open_windows;
1185                        win->next = p->windows;
1186                        p->windows = win;
1187                }
1188        }
1189        if (win != *w_cursor) {
1190                win->last_used = pack_used_ctr++;
1191                win->inuse_cnt++;
1192                *w_cursor = win;
1193        }
1194        offset -= win->offset;
1195        if (left)
1196                *left = win->len - xsize_t(offset);
1197        return win->base + offset;
1198}
1199
1200static struct packed_git *alloc_packed_git(int extra)
1201{
1202        struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
1203        memset(p, 0, sizeof(*p));
1204        p->pack_fd = -1;
1205        return p;
1206}
1207
1208static void try_to_free_pack_memory(size_t size)
1209{
1210        release_pack_memory(size);
1211}
1212
1213struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
1214{
1215        static int have_set_try_to_free_routine;
1216        struct stat st;
1217        size_t alloc;
1218        struct packed_git *p;
1219
1220        if (!have_set_try_to_free_routine) {
1221                have_set_try_to_free_routine = 1;
1222                set_try_to_free_routine(try_to_free_pack_memory);
1223        }
1224
1225        /*
1226         * Make sure a corresponding .pack file exists and that
1227         * the index looks sane.
1228         */
1229        if (!strip_suffix_mem(path, &path_len, ".idx"))
1230                return NULL;
1231
1232        /*
1233         * ".pack" is long enough to hold any suffix we're adding (and
1234         * the use xsnprintf double-checks that)
1235         */
1236        alloc = st_add3(path_len, strlen(".pack"), 1);
1237        p = alloc_packed_git(alloc);
1238        memcpy(p->pack_name, path, path_len);
1239
1240        xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
1241        if (!access(p->pack_name, F_OK))
1242                p->pack_keep = 1;
1243
1244        xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
1245        if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
1246                free(p);
1247                return NULL;
1248        }
1249
1250        /* ok, it looks sane as far as we can check without
1251         * actually mapping the pack file.
1252         */
1253        p->pack_size = st.st_size;
1254        p->pack_local = local;
1255        p->mtime = st.st_mtime;
1256        if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
1257                hashclr(p->sha1);
1258        return p;
1259}
1260
1261struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
1262{
1263        const char *path = sha1_pack_name(sha1);
1264        size_t alloc = st_add(strlen(path), 1);
1265        struct packed_git *p = alloc_packed_git(alloc);
1266
1267        memcpy(p->pack_name, path, alloc); /* includes NUL */
1268        hashcpy(p->sha1, sha1);
1269        if (check_packed_git_idx(idx_path, p)) {
1270                free(p);
1271                return NULL;
1272        }
1273
1274        return p;
1275}
1276
1277void install_packed_git(struct packed_git *pack)
1278{
1279        if (pack->pack_fd != -1)
1280                pack_open_fds++;
1281
1282        pack->next = packed_git;
1283        packed_git = pack;
1284}
1285
1286void (*report_garbage)(unsigned seen_bits, const char *path);
1287
1288static void report_helper(const struct string_list *list,
1289                          int seen_bits, int first, int last)
1290{
1291        if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
1292                return;
1293
1294        for (; first < last; first++)
1295                report_garbage(seen_bits, list->items[first].string);
1296}
1297
1298static void report_pack_garbage(struct string_list *list)
1299{
1300        int i, baselen = -1, first = 0, seen_bits = 0;
1301
1302        if (!report_garbage)
1303                return;
1304
1305        string_list_sort(list);
1306
1307        for (i = 0; i < list->nr; i++) {
1308                const char *path = list->items[i].string;
1309                if (baselen != -1 &&
1310                    strncmp(path, list->items[first].string, baselen)) {
1311                        report_helper(list, seen_bits, first, i);
1312                        baselen = -1;
1313                        seen_bits = 0;
1314                }
1315                if (baselen == -1) {
1316                        const char *dot = strrchr(path, '.');
1317                        if (!dot) {
1318                                report_garbage(PACKDIR_FILE_GARBAGE, path);
1319                                continue;
1320                        }
1321                        baselen = dot - path + 1;
1322                        first = i;
1323                }
1324                if (!strcmp(path + baselen, "pack"))
1325                        seen_bits |= 1;
1326                else if (!strcmp(path + baselen, "idx"))
1327                        seen_bits |= 2;
1328        }
1329        report_helper(list, seen_bits, first, list->nr);
1330}
1331
1332static void prepare_packed_git_one(char *objdir, int local)
1333{
1334        struct strbuf path = STRBUF_INIT;
1335        size_t dirnamelen;
1336        DIR *dir;
1337        struct dirent *de;
1338        struct string_list garbage = STRING_LIST_INIT_DUP;
1339
1340        strbuf_addstr(&path, objdir);
1341        strbuf_addstr(&path, "/pack");
1342        dir = opendir(path.buf);
1343        if (!dir) {
1344                if (errno != ENOENT)
1345                        error_errno("unable to open object pack directory: %s",
1346                                    path.buf);
1347                strbuf_release(&path);
1348                return;
1349        }
1350        strbuf_addch(&path, '/');
1351        dirnamelen = path.len;
1352        while ((de = readdir(dir)) != NULL) {
1353                struct packed_git *p;
1354                size_t base_len;
1355
1356                if (is_dot_or_dotdot(de->d_name))
1357                        continue;
1358
1359                strbuf_setlen(&path, dirnamelen);
1360                strbuf_addstr(&path, de->d_name);
1361
1362                base_len = path.len;
1363                if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
1364                        /* Don't reopen a pack we already have. */
1365                        for (p = packed_git; p; p = p->next) {
1366                                size_t len;
1367                                if (strip_suffix(p->pack_name, ".pack", &len) &&
1368                                    len == base_len &&
1369                                    !memcmp(p->pack_name, path.buf, len))
1370                                        break;
1371                        }
1372                        if (p == NULL &&
1373                            /*
1374                             * See if it really is a valid .idx file with
1375                             * corresponding .pack file that we can map.
1376                             */
1377                            (p = add_packed_git(path.buf, path.len, local)) != NULL)
1378                                install_packed_git(p);
1379                }
1380
1381                if (!report_garbage)
1382                        continue;
1383
1384                if (ends_with(de->d_name, ".idx") ||
1385                    ends_with(de->d_name, ".pack") ||
1386                    ends_with(de->d_name, ".bitmap") ||
1387                    ends_with(de->d_name, ".keep"))
1388                        string_list_append(&garbage, path.buf);
1389                else
1390                        report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
1391        }
1392        closedir(dir);
1393        report_pack_garbage(&garbage);
1394        string_list_clear(&garbage, 0);
1395        strbuf_release(&path);
1396}
1397
1398static void *get_next_packed_git(const void *p)
1399{
1400        return ((const struct packed_git *)p)->next;
1401}
1402
1403static void set_next_packed_git(void *p, void *next)
1404{
1405        ((struct packed_git *)p)->next = next;
1406}
1407
1408static int sort_pack(const void *a_, const void *b_)
1409{
1410        const struct packed_git *a = a_;
1411        const struct packed_git *b = b_;
1412        int st;
1413
1414        /*
1415         * Local packs tend to contain objects specific to our
1416         * variant of the project than remote ones.  In addition,
1417         * remote ones could be on a network mounted filesystem.
1418         * Favor local ones for these reasons.
1419         */
1420        st = a->pack_local - b->pack_local;
1421        if (st)
1422                return -st;
1423
1424        /*
1425         * Younger packs tend to contain more recent objects,
1426         * and more recent objects tend to get accessed more
1427         * often.
1428         */
1429        if (a->mtime < b->mtime)
1430                return 1;
1431        else if (a->mtime == b->mtime)
1432                return 0;
1433        return -1;
1434}
1435
1436static void rearrange_packed_git(void)
1437{
1438        packed_git = llist_mergesort(packed_git, get_next_packed_git,
1439                                     set_next_packed_git, sort_pack);
1440}
1441
1442static void prepare_packed_git_mru(void)
1443{
1444        struct packed_git *p;
1445
1446        mru_clear(packed_git_mru);
1447        for (p = packed_git; p; p = p->next)
1448                mru_append(packed_git_mru, p);
1449}
1450
1451static int prepare_packed_git_run_once = 0;
1452void prepare_packed_git(void)
1453{
1454        struct alternate_object_database *alt;
1455
1456        if (prepare_packed_git_run_once)
1457                return;
1458        prepare_packed_git_one(get_object_directory(), 1);
1459        prepare_alt_odb();
1460        for (alt = alt_odb_list; alt; alt = alt->next) {
1461                alt->name[-1] = 0;
1462                prepare_packed_git_one(alt->base, 0);
1463                alt->name[-1] = '/';
1464        }
1465        rearrange_packed_git();
1466        prepare_packed_git_mru();
1467        prepare_packed_git_run_once = 1;
1468}
1469
1470void reprepare_packed_git(void)
1471{
1472        prepare_packed_git_run_once = 0;
1473        prepare_packed_git();
1474}
1475
1476static void mark_bad_packed_object(struct packed_git *p,
1477                                   const unsigned char *sha1)
1478{
1479        unsigned i;
1480        for (i = 0; i < p->num_bad_objects; i++)
1481                if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1482                        return;
1483        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
1484                                      st_mult(GIT_SHA1_RAWSZ,
1485                                              st_add(p->num_bad_objects, 1)));
1486        hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1487        p->num_bad_objects++;
1488}
1489
1490static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1491{
1492        struct packed_git *p;
1493        unsigned i;
1494
1495        for (p = packed_git; p; p = p->next)
1496                for (i = 0; i < p->num_bad_objects; i++)
1497                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1498                                return p;
1499        return NULL;
1500}
1501
1502/*
1503 * With an in-core object data in "map", rehash it to make sure the
1504 * object name actually matches "sha1" to detect object corruption.
1505 * With "map" == NULL, try reading the object named with "sha1" using
1506 * the streaming interface and rehash it to do the same.
1507 */
1508int check_sha1_signature(const unsigned char *sha1, void *map,
1509                         unsigned long size, const char *type)
1510{
1511        unsigned char real_sha1[20];
1512        enum object_type obj_type;
1513        struct git_istream *st;
1514        git_SHA_CTX c;
1515        char hdr[32];
1516        int hdrlen;
1517
1518        if (map) {
1519                hash_sha1_file(map, size, type, real_sha1);
1520                return hashcmp(sha1, real_sha1) ? -1 : 0;
1521        }
1522
1523        st = open_istream(sha1, &obj_type, &size, NULL);
1524        if (!st)
1525                return -1;
1526
1527        /* Generate the header */
1528        hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1;
1529
1530        /* Sha1.. */
1531        git_SHA1_Init(&c);
1532        git_SHA1_Update(&c, hdr, hdrlen);
1533        for (;;) {
1534                char buf[1024 * 16];
1535                ssize_t readlen = read_istream(st, buf, sizeof(buf));
1536
1537                if (readlen < 0) {
1538                        close_istream(st);
1539                        return -1;
1540                }
1541                if (!readlen)
1542                        break;
1543                git_SHA1_Update(&c, buf, readlen);
1544        }
1545        git_SHA1_Final(real_sha1, &c);
1546        close_istream(st);
1547        return hashcmp(sha1, real_sha1) ? -1 : 0;
1548}
1549
1550int git_open_noatime(const char *name)
1551{
1552        static int sha1_file_open_flag = O_NOATIME;
1553
1554        for (;;) {
1555                int fd;
1556
1557                errno = 0;
1558                fd = open(name, O_RDONLY | sha1_file_open_flag);
1559                if (fd >= 0)
1560                        return fd;
1561
1562                /* Might the failure be due to O_NOATIME? */
1563                if (errno != ENOENT && sha1_file_open_flag) {
1564                        sha1_file_open_flag = 0;
1565                        continue;
1566                }
1567
1568                return -1;
1569        }
1570}
1571
1572static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
1573{
1574        struct alternate_object_database *alt;
1575
1576        if (!lstat(sha1_file_name(sha1), st))
1577                return 0;
1578
1579        prepare_alt_odb();
1580        errno = ENOENT;
1581        for (alt = alt_odb_list; alt; alt = alt->next) {
1582                fill_sha1_path(alt->name, sha1);
1583                if (!lstat(alt->base, st))
1584                        return 0;
1585        }
1586
1587        return -1;
1588}
1589
1590static int open_sha1_file(const unsigned char *sha1)
1591{
1592        int fd;
1593        struct alternate_object_database *alt;
1594        int most_interesting_errno;
1595
1596        fd = git_open_noatime(sha1_file_name(sha1));
1597        if (fd >= 0)
1598                return fd;
1599        most_interesting_errno = errno;
1600
1601        prepare_alt_odb();
1602        for (alt = alt_odb_list; alt; alt = alt->next) {
1603                fill_sha1_path(alt->name, sha1);
1604                fd = git_open_noatime(alt->base);
1605                if (fd >= 0)
1606                        return fd;
1607                if (most_interesting_errno == ENOENT)
1608                        most_interesting_errno = errno;
1609        }
1610        errno = most_interesting_errno;
1611        return -1;
1612}
1613
1614void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1615{
1616        void *map;
1617        int fd;
1618
1619        fd = open_sha1_file(sha1);
1620        map = NULL;
1621        if (fd >= 0) {
1622                struct stat st;
1623
1624                if (!fstat(fd, &st)) {
1625                        *size = xsize_t(st.st_size);
1626                        if (!*size) {
1627                                /* mmap() is forbidden on empty files */
1628                                error("object file %s is empty", sha1_file_name(sha1));
1629                                return NULL;
1630                        }
1631                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1632                }
1633                close(fd);
1634        }
1635        return map;
1636}
1637
1638unsigned long unpack_object_header_buffer(const unsigned char *buf,
1639                unsigned long len, enum object_type *type, unsigned long *sizep)
1640{
1641        unsigned shift;
1642        unsigned long size, c;
1643        unsigned long used = 0;
1644
1645        c = buf[used++];
1646        *type = (c >> 4) & 7;
1647        size = c & 15;
1648        shift = 4;
1649        while (c & 0x80) {
1650                if (len <= used || bitsizeof(long) <= shift) {
1651                        error("bad object header");
1652                        size = used = 0;
1653                        break;
1654                }
1655                c = buf[used++];
1656                size += (c & 0x7f) << shift;
1657                shift += 7;
1658        }
1659        *sizep = size;
1660        return used;
1661}
1662
1663static int unpack_sha1_short_header(git_zstream *stream,
1664                                    unsigned char *map, unsigned long mapsize,
1665                                    void *buffer, unsigned long bufsiz)
1666{
1667        /* Get the data stream */
1668        memset(stream, 0, sizeof(*stream));
1669        stream->next_in = map;
1670        stream->avail_in = mapsize;
1671        stream->next_out = buffer;
1672        stream->avail_out = bufsiz;
1673
1674        git_inflate_init(stream);
1675        return git_inflate(stream, 0);
1676}
1677
1678int unpack_sha1_header(git_zstream *stream,
1679                       unsigned char *map, unsigned long mapsize,
1680                       void *buffer, unsigned long bufsiz)
1681{
1682        int status = unpack_sha1_short_header(stream, map, mapsize,
1683                                              buffer, bufsiz);
1684
1685        if (status < Z_OK)
1686                return status;
1687
1688        /* Make sure we have the terminating NUL */
1689        if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1690                return -1;
1691        return 0;
1692}
1693
1694static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
1695                                        unsigned long mapsize, void *buffer,
1696                                        unsigned long bufsiz, struct strbuf *header)
1697{
1698        int status;
1699
1700        status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);
1701        if (status < Z_OK)
1702                return -1;
1703
1704        /*
1705         * Check if entire header is unpacked in the first iteration.
1706         */
1707        if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1708                return 0;
1709
1710        /*
1711         * buffer[0..bufsiz] was not large enough.  Copy the partial
1712         * result out to header, and then append the result of further
1713         * reading the stream.
1714         */
1715        strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1716        stream->next_out = buffer;
1717        stream->avail_out = bufsiz;
1718
1719        do {
1720                status = git_inflate(stream, 0);
1721                strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1722                if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1723                        return 0;
1724                stream->next_out = buffer;
1725                stream->avail_out = bufsiz;
1726        } while (status != Z_STREAM_END);
1727        return -1;
1728}
1729
1730static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1731{
1732        int bytes = strlen(buffer) + 1;
1733        unsigned char *buf = xmallocz(size);
1734        unsigned long n;
1735        int status = Z_OK;
1736
1737        n = stream->total_out - bytes;
1738        if (n > size)
1739                n = size;
1740        memcpy(buf, (char *) buffer + bytes, n);
1741        bytes = n;
1742        if (bytes <= size) {
1743                /*
1744                 * The above condition must be (bytes <= size), not
1745                 * (bytes < size).  In other words, even though we
1746                 * expect no more output and set avail_out to zero,
1747                 * the input zlib stream may have bytes that express
1748                 * "this concludes the stream", and we *do* want to
1749                 * eat that input.
1750                 *
1751                 * Otherwise we would not be able to test that we
1752                 * consumed all the input to reach the expected size;
1753                 * we also want to check that zlib tells us that all
1754                 * went well with status == Z_STREAM_END at the end.
1755                 */
1756                stream->next_out = buf + bytes;
1757                stream->avail_out = size - bytes;
1758                while (status == Z_OK)
1759                        status = git_inflate(stream, Z_FINISH);
1760        }
1761        if (status == Z_STREAM_END && !stream->avail_in) {
1762                git_inflate_end(stream);
1763                return buf;
1764        }
1765
1766        if (status < 0)
1767                error("corrupt loose object '%s'", sha1_to_hex(sha1));
1768        else if (stream->avail_in)
1769                error("garbage at end of loose object '%s'",
1770                      sha1_to_hex(sha1));
1771        free(buf);
1772        return NULL;
1773}
1774
1775/*
1776 * We used to just use "sscanf()", but that's actually way
1777 * too permissive for what we want to check. So do an anal
1778 * object header parse by hand.
1779 */
1780static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
1781                               unsigned int flags)
1782{
1783        const char *type_buf = hdr;
1784        unsigned long size;
1785        int type, type_len = 0;
1786
1787        /*
1788         * The type can be of any size but is followed by
1789         * a space.
1790         */
1791        for (;;) {
1792                char c = *hdr++;
1793                if (!c)
1794                        return -1;
1795                if (c == ' ')
1796                        break;
1797                type_len++;
1798        }
1799
1800        type = type_from_string_gently(type_buf, type_len, 1);
1801        if (oi->typename)
1802                strbuf_add(oi->typename, type_buf, type_len);
1803        /*
1804         * Set type to 0 if its an unknown object and
1805         * we're obtaining the type using '--allow-unknown-type'
1806         * option.
1807         */
1808        if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
1809                type = 0;
1810        else if (type < 0)
1811                die("invalid object type");
1812        if (oi->typep)
1813                *oi->typep = type;
1814
1815        /*
1816         * The length must follow immediately, and be in canonical
1817         * decimal format (ie "010" is not valid).
1818         */
1819        size = *hdr++ - '0';
1820        if (size > 9)
1821                return -1;
1822        if (size) {
1823                for (;;) {
1824                        unsigned long c = *hdr - '0';
1825                        if (c > 9)
1826                                break;
1827                        hdr++;
1828                        size = size * 10 + c;
1829                }
1830        }
1831
1832        if (oi->sizep)
1833                *oi->sizep = size;
1834
1835        /*
1836         * The length must be followed by a zero byte
1837         */
1838        return *hdr ? -1 : type;
1839}
1840
1841int parse_sha1_header(const char *hdr, unsigned long *sizep)
1842{
1843        struct object_info oi;
1844
1845        oi.sizep = sizep;
1846        oi.typename = NULL;
1847        oi.typep = NULL;
1848        return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
1849}
1850
1851static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
1852{
1853        int ret;
1854        git_zstream stream;
1855        char hdr[8192];
1856
1857        ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
1858        if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0)
1859                return NULL;
1860
1861        return unpack_sha1_rest(&stream, hdr, *size, sha1);
1862}
1863
1864unsigned long get_size_from_delta(struct packed_git *p,
1865                                  struct pack_window **w_curs,
1866                                  off_t curpos)
1867{
1868        const unsigned char *data;
1869        unsigned char delta_head[20], *in;
1870        git_zstream stream;
1871        int st;
1872
1873        memset(&stream, 0, sizeof(stream));
1874        stream.next_out = delta_head;
1875        stream.avail_out = sizeof(delta_head);
1876
1877        git_inflate_init(&stream);
1878        do {
1879                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1880                stream.next_in = in;
1881                st = git_inflate(&stream, Z_FINISH);
1882                curpos += stream.next_in - in;
1883        } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1884                 stream.total_out < sizeof(delta_head));
1885        git_inflate_end(&stream);
1886        if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1887                error("delta data unpack-initial failed");
1888                return 0;
1889        }
1890
1891        /* Examine the initial part of the delta to figure out
1892         * the result size.
1893         */
1894        data = delta_head;
1895
1896        /* ignore base size */
1897        get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1898
1899        /* Read the result size */
1900        return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1901}
1902
1903static off_t get_delta_base(struct packed_git *p,
1904                                    struct pack_window **w_curs,
1905                                    off_t *curpos,
1906                                    enum object_type type,
1907                                    off_t delta_obj_offset)
1908{
1909        unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1910        off_t base_offset;
1911
1912        /* use_pack() assured us we have [base_info, base_info + 20)
1913         * as a range that we can look at without walking off the
1914         * end of the mapped window.  Its actually the hash size
1915         * that is assured.  An OFS_DELTA longer than the hash size
1916         * is stupid, as then a REF_DELTA would be smaller to store.
1917         */
1918        if (type == OBJ_OFS_DELTA) {
1919                unsigned used = 0;
1920                unsigned char c = base_info[used++];
1921                base_offset = c & 127;
1922                while (c & 128) {
1923                        base_offset += 1;
1924                        if (!base_offset || MSB(base_offset, 7))
1925                                return 0;  /* overflow */
1926                        c = base_info[used++];
1927                        base_offset = (base_offset << 7) + (c & 127);
1928                }
1929                base_offset = delta_obj_offset - base_offset;
1930                if (base_offset <= 0 || base_offset >= delta_obj_offset)
1931                        return 0;  /* out of bound */
1932                *curpos += used;
1933        } else if (type == OBJ_REF_DELTA) {
1934                /* The base entry _must_ be in the same pack */
1935                base_offset = find_pack_entry_one(base_info, p);
1936                *curpos += 20;
1937        } else
1938                die("I am totally screwed");
1939        return base_offset;
1940}
1941
1942/*
1943 * Like get_delta_base above, but we return the sha1 instead of the pack
1944 * offset. This means it is cheaper for REF deltas (we do not have to do
1945 * the final object lookup), but more expensive for OFS deltas (we
1946 * have to load the revidx to convert the offset back into a sha1).
1947 */
1948static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1949                                                struct pack_window **w_curs,
1950                                                off_t curpos,
1951                                                enum object_type type,
1952                                                off_t delta_obj_offset)
1953{
1954        if (type == OBJ_REF_DELTA) {
1955                unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1956                return base;
1957        } else if (type == OBJ_OFS_DELTA) {
1958                struct revindex_entry *revidx;
1959                off_t base_offset = get_delta_base(p, w_curs, &curpos,
1960                                                   type, delta_obj_offset);
1961
1962                if (!base_offset)
1963                        return NULL;
1964
1965                revidx = find_pack_revindex(p, base_offset);
1966                if (!revidx)
1967                        return NULL;
1968
1969                return nth_packed_object_sha1(p, revidx->nr);
1970        } else
1971                return NULL;
1972}
1973
1974int unpack_object_header(struct packed_git *p,
1975                         struct pack_window **w_curs,
1976                         off_t *curpos,
1977                         unsigned long *sizep)
1978{
1979        unsigned char *base;
1980        unsigned long left;
1981        unsigned long used;
1982        enum object_type type;
1983
1984        /* use_pack() assures us we have [base, base + 20) available
1985         * as a range that we can look at.  (Its actually the hash
1986         * size that is assured.)  With our object header encoding
1987         * the maximum deflated object size is 2^137, which is just
1988         * insane, so we know won't exceed what we have been given.
1989         */
1990        base = use_pack(p, w_curs, *curpos, &left);
1991        used = unpack_object_header_buffer(base, left, &type, sizep);
1992        if (!used) {
1993                type = OBJ_BAD;
1994        } else
1995                *curpos += used;
1996
1997        return type;
1998}
1999
2000static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
2001{
2002        int type;
2003        struct revindex_entry *revidx;
2004        const unsigned char *sha1;
2005        revidx = find_pack_revindex(p, obj_offset);
2006        if (!revidx)
2007                return OBJ_BAD;
2008        sha1 = nth_packed_object_sha1(p, revidx->nr);
2009        mark_bad_packed_object(p, sha1);
2010        type = sha1_object_info(sha1, NULL);
2011        if (type <= OBJ_NONE)
2012                return OBJ_BAD;
2013        return type;
2014}
2015
2016#define POI_STACK_PREALLOC 64
2017
2018static enum object_type packed_to_object_type(struct packed_git *p,
2019                                              off_t obj_offset,
2020                                              enum object_type type,
2021                                              struct pack_window **w_curs,
2022                                              off_t curpos)
2023{
2024        off_t small_poi_stack[POI_STACK_PREALLOC];
2025        off_t *poi_stack = small_poi_stack;
2026        int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
2027
2028        while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2029                off_t base_offset;
2030                unsigned long size;
2031                /* Push the object we're going to leave behind */
2032                if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
2033                        poi_stack_alloc = alloc_nr(poi_stack_nr);
2034                        ALLOC_ARRAY(poi_stack, poi_stack_alloc);
2035                        memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
2036                } else {
2037                        ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
2038                }
2039                poi_stack[poi_stack_nr++] = obj_offset;
2040                /* If parsing the base offset fails, just unwind */
2041                base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
2042                if (!base_offset)
2043                        goto unwind;
2044                curpos = obj_offset = base_offset;
2045                type = unpack_object_header(p, w_curs, &curpos, &size);
2046                if (type <= OBJ_NONE) {
2047                        /* If getting the base itself fails, we first
2048                         * retry the base, otherwise unwind */
2049                        type = retry_bad_packed_offset(p, base_offset);
2050                        if (type > OBJ_NONE)
2051                                goto out;
2052                        goto unwind;
2053                }
2054        }
2055
2056        switch (type) {
2057        case OBJ_BAD:
2058        case OBJ_COMMIT:
2059        case OBJ_TREE:
2060        case OBJ_BLOB:
2061        case OBJ_TAG:
2062                break;
2063        default:
2064                error("unknown object type %i at offset %"PRIuMAX" in %s",
2065                      type, (uintmax_t)obj_offset, p->pack_name);
2066                type = OBJ_BAD;
2067        }
2068
2069out:
2070        if (poi_stack != small_poi_stack)
2071                free(poi_stack);
2072        return type;
2073
2074unwind:
2075        while (poi_stack_nr) {
2076                obj_offset = poi_stack[--poi_stack_nr];
2077                type = retry_bad_packed_offset(p, obj_offset);
2078                if (type > OBJ_NONE)
2079                        goto out;
2080        }
2081        type = OBJ_BAD;
2082        goto out;
2083}
2084
2085static int packed_object_info(struct packed_git *p, off_t obj_offset,
2086                              struct object_info *oi)
2087{
2088        struct pack_window *w_curs = NULL;
2089        unsigned long size;
2090        off_t curpos = obj_offset;
2091        enum object_type type;
2092
2093        /*
2094         * We always get the representation type, but only convert it to
2095         * a "real" type later if the caller is interested.
2096         */
2097        type = unpack_object_header(p, &w_curs, &curpos, &size);
2098
2099        if (oi->sizep) {
2100                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2101                        off_t tmp_pos = curpos;
2102                        off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
2103                                                           type, obj_offset);
2104                        if (!base_offset) {
2105                                type = OBJ_BAD;
2106                                goto out;
2107                        }
2108                        *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
2109                        if (*oi->sizep == 0) {
2110                                type = OBJ_BAD;
2111                                goto out;
2112                        }
2113                } else {
2114                        *oi->sizep = size;
2115                }
2116        }
2117
2118        if (oi->disk_sizep) {
2119                struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2120                *oi->disk_sizep = revidx[1].offset - obj_offset;
2121        }
2122
2123        if (oi->typep) {
2124                *oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
2125                if (*oi->typep < 0) {
2126                        type = OBJ_BAD;
2127                        goto out;
2128                }
2129        }
2130
2131        if (oi->delta_base_sha1) {
2132                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2133                        const unsigned char *base;
2134
2135                        base = get_delta_base_sha1(p, &w_curs, curpos,
2136                                                   type, obj_offset);
2137                        if (!base) {
2138                                type = OBJ_BAD;
2139                                goto out;
2140                        }
2141
2142                        hashcpy(oi->delta_base_sha1, base);
2143                } else
2144                        hashclr(oi->delta_base_sha1);
2145        }
2146
2147out:
2148        unuse_pack(&w_curs);
2149        return type;
2150}
2151
2152static void *unpack_compressed_entry(struct packed_git *p,
2153                                    struct pack_window **w_curs,
2154                                    off_t curpos,
2155                                    unsigned long size)
2156{
2157        int st;
2158        git_zstream stream;
2159        unsigned char *buffer, *in;
2160
2161        buffer = xmallocz_gently(size);
2162        if (!buffer)
2163                return NULL;
2164        memset(&stream, 0, sizeof(stream));
2165        stream.next_out = buffer;
2166        stream.avail_out = size + 1;
2167
2168        git_inflate_init(&stream);
2169        do {
2170                in = use_pack(p, w_curs, curpos, &stream.avail_in);
2171                stream.next_in = in;
2172                st = git_inflate(&stream, Z_FINISH);
2173                if (!stream.avail_out)
2174                        break; /* the payload is larger than it should be */
2175                curpos += stream.next_in - in;
2176        } while (st == Z_OK || st == Z_BUF_ERROR);
2177        git_inflate_end(&stream);
2178        if ((st != Z_STREAM_END) || stream.total_out != size) {
2179                free(buffer);
2180                return NULL;
2181        }
2182
2183        return buffer;
2184}
2185
2186static struct hashmap delta_base_cache;
2187static size_t delta_base_cached;
2188
2189static LIST_HEAD(delta_base_cache_lru);
2190
2191struct delta_base_cache_key {
2192        struct packed_git *p;
2193        off_t base_offset;
2194};
2195
2196struct delta_base_cache_entry {
2197        struct hashmap hash;
2198        struct delta_base_cache_key key;
2199        struct list_head lru;
2200        void *data;
2201        unsigned long size;
2202        enum object_type type;
2203};
2204
2205static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
2206{
2207        unsigned int hash;
2208
2209        hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
2210        hash += (hash >> 8) + (hash >> 16);
2211        return hash;
2212}
2213
2214static struct delta_base_cache_entry *
2215get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
2216{
2217        struct hashmap_entry entry;
2218        struct delta_base_cache_key key;
2219
2220        if (!delta_base_cache.cmpfn)
2221                return NULL;
2222
2223        hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
2224        key.p = p;
2225        key.base_offset = base_offset;
2226        return hashmap_get(&delta_base_cache, &entry, &key);
2227}
2228
2229static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
2230                                   const struct delta_base_cache_key *b)
2231{
2232        return a->p == b->p && a->base_offset == b->base_offset;
2233}
2234
2235static int delta_base_cache_hash_cmp(const void *va, const void *vb,
2236                                     const void *vkey)
2237{
2238        const struct delta_base_cache_entry *a = va, *b = vb;
2239        const struct delta_base_cache_key *key = vkey;
2240        if (key)
2241                return !delta_base_cache_key_eq(&a->key, key);
2242        else
2243                return !delta_base_cache_key_eq(&a->key, &b->key);
2244}
2245
2246static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
2247{
2248        return !!get_delta_base_cache_entry(p, base_offset);
2249}
2250
2251/*
2252 * Remove the entry from the cache, but do _not_ free the associated
2253 * entry data. The caller takes ownership of the "data" buffer, and
2254 * should copy out any fields it wants before detaching.
2255 */
2256static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
2257{
2258        hashmap_remove(&delta_base_cache, ent, &ent->key);
2259        list_del(&ent->lru);
2260        delta_base_cached -= ent->size;
2261        free(ent);
2262}
2263
2264static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
2265        unsigned long *base_size, enum object_type *type)
2266{
2267        struct delta_base_cache_entry *ent;
2268
2269        ent = get_delta_base_cache_entry(p, base_offset);
2270        if (!ent)
2271                return unpack_entry(p, base_offset, type, base_size);
2272
2273        *type = ent->type;
2274        *base_size = ent->size;
2275        return xmemdupz(ent->data, ent->size);
2276}
2277
2278static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
2279{
2280        free(ent->data);
2281        detach_delta_base_cache_entry(ent);
2282}
2283
2284void clear_delta_base_cache(void)
2285{
2286        struct hashmap_iter iter;
2287        struct delta_base_cache_entry *entry;
2288        for (entry = hashmap_iter_first(&delta_base_cache, &iter);
2289             entry;
2290             entry = hashmap_iter_next(&iter)) {
2291                release_delta_base_cache(entry);
2292        }
2293}
2294
2295static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
2296        void *base, unsigned long base_size, enum object_type type)
2297{
2298        struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
2299        struct list_head *lru, *tmp;
2300
2301        delta_base_cached += base_size;
2302
2303        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
2304                struct delta_base_cache_entry *f =
2305                        list_entry(lru, struct delta_base_cache_entry, lru);
2306                if (delta_base_cached <= delta_base_cache_limit)
2307                        break;
2308                release_delta_base_cache(f);
2309        }
2310
2311        ent->key.p = p;
2312        ent->key.base_offset = base_offset;
2313        ent->type = type;
2314        ent->data = base;
2315        ent->size = base_size;
2316        list_add_tail(&ent->lru, &delta_base_cache_lru);
2317
2318        if (!delta_base_cache.cmpfn)
2319                hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, 0);
2320        hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
2321        hashmap_add(&delta_base_cache, ent);
2322}
2323
2324static void *read_object(const unsigned char *sha1, enum object_type *type,
2325                         unsigned long *size);
2326
2327static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
2328{
2329        static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
2330        trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
2331                         p->pack_name, (uintmax_t)obj_offset);
2332}
2333
2334int do_check_packed_object_crc;
2335
2336#define UNPACK_ENTRY_STACK_PREALLOC 64
2337struct unpack_entry_stack_ent {
2338        off_t obj_offset;
2339        off_t curpos;
2340        unsigned long size;
2341};
2342
2343void *unpack_entry(struct packed_git *p, off_t obj_offset,
2344                   enum object_type *final_type, unsigned long *final_size)
2345{
2346        struct pack_window *w_curs = NULL;
2347        off_t curpos = obj_offset;
2348        void *data = NULL;
2349        unsigned long size;
2350        enum object_type type;
2351        struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
2352        struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
2353        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
2354        int base_from_cache = 0;
2355
2356        write_pack_access_log(p, obj_offset);
2357
2358        /* PHASE 1: drill down to the innermost base object */
2359        for (;;) {
2360                off_t base_offset;
2361                int i;
2362                struct delta_base_cache_entry *ent;
2363
2364                ent = get_delta_base_cache_entry(p, curpos);
2365                if (ent) {
2366                        type = ent->type;
2367                        data = ent->data;
2368                        size = ent->size;
2369                        detach_delta_base_cache_entry(ent);
2370                        base_from_cache = 1;
2371                        break;
2372                }
2373
2374                if (do_check_packed_object_crc && p->index_version > 1) {
2375                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2376                        off_t len = revidx[1].offset - obj_offset;
2377                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
2378                                const unsigned char *sha1 =
2379                                        nth_packed_object_sha1(p, revidx->nr);
2380                                error("bad packed object CRC for %s",
2381                                      sha1_to_hex(sha1));
2382                                mark_bad_packed_object(p, sha1);
2383                                unuse_pack(&w_curs);
2384                                return NULL;
2385                        }
2386                }
2387
2388                type = unpack_object_header(p, &w_curs, &curpos, &size);
2389                if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
2390                        break;
2391
2392                base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
2393                if (!base_offset) {
2394                        error("failed to validate delta base reference "
2395                              "at offset %"PRIuMAX" from %s",
2396                              (uintmax_t)curpos, p->pack_name);
2397                        /* bail to phase 2, in hopes of recovery */
2398                        data = NULL;
2399                        break;
2400                }
2401
2402                /* push object, proceed to base */
2403                if (delta_stack_nr >= delta_stack_alloc
2404                    && delta_stack == small_delta_stack) {
2405                        delta_stack_alloc = alloc_nr(delta_stack_nr);
2406                        ALLOC_ARRAY(delta_stack, delta_stack_alloc);
2407                        memcpy(delta_stack, small_delta_stack,
2408                               sizeof(*delta_stack)*delta_stack_nr);
2409                } else {
2410                        ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
2411                }
2412                i = delta_stack_nr++;
2413                delta_stack[i].obj_offset = obj_offset;
2414                delta_stack[i].curpos = curpos;
2415                delta_stack[i].size = size;
2416
2417                curpos = obj_offset = base_offset;
2418        }
2419
2420        /* PHASE 2: handle the base */
2421        switch (type) {
2422        case OBJ_OFS_DELTA:
2423        case OBJ_REF_DELTA:
2424                if (data)
2425                        die("BUG: unpack_entry: left loop at a valid delta");
2426                break;
2427        case OBJ_COMMIT:
2428        case OBJ_TREE:
2429        case OBJ_BLOB:
2430        case OBJ_TAG:
2431                if (!base_from_cache)
2432                        data = unpack_compressed_entry(p, &w_curs, curpos, size);
2433                break;
2434        default:
2435                data = NULL;
2436                error("unknown object type %i at offset %"PRIuMAX" in %s",
2437                      type, (uintmax_t)obj_offset, p->pack_name);
2438        }
2439
2440        /* PHASE 3: apply deltas in order */
2441
2442        /* invariants:
2443         *   'data' holds the base data, or NULL if there was corruption
2444         */
2445        while (delta_stack_nr) {
2446                void *delta_data;
2447                void *base = data;
2448                unsigned long delta_size, base_size = size;
2449                int i;
2450
2451                data = NULL;
2452
2453                if (base)
2454                        add_delta_base_cache(p, obj_offset, base, base_size, type);
2455
2456                if (!base) {
2457                        /*
2458                         * We're probably in deep shit, but let's try to fetch
2459                         * the required base anyway from another pack or loose.
2460                         * This is costly but should happen only in the presence
2461                         * of a corrupted pack, and is better than failing outright.
2462                         */
2463                        struct revindex_entry *revidx;
2464                        const unsigned char *base_sha1;
2465                        revidx = find_pack_revindex(p, obj_offset);
2466                        if (revidx) {
2467                                base_sha1 = nth_packed_object_sha1(p, revidx->nr);
2468                                error("failed to read delta base object %s"
2469                                      " at offset %"PRIuMAX" from %s",
2470                                      sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
2471                                      p->pack_name);
2472                                mark_bad_packed_object(p, base_sha1);
2473                                base = read_object(base_sha1, &type, &base_size);
2474                        }
2475                }
2476
2477                i = --delta_stack_nr;
2478                obj_offset = delta_stack[i].obj_offset;
2479                curpos = delta_stack[i].curpos;
2480                delta_size = delta_stack[i].size;
2481
2482                if (!base)
2483                        continue;
2484
2485                delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
2486
2487                if (!delta_data) {
2488                        error("failed to unpack compressed delta "
2489                              "at offset %"PRIuMAX" from %s",
2490                              (uintmax_t)curpos, p->pack_name);
2491                        data = NULL;
2492                        continue;
2493                }
2494
2495                data = patch_delta(base, base_size,
2496                                   delta_data, delta_size,
2497                                   &size);
2498
2499                /*
2500                 * We could not apply the delta; warn the user, but keep going.
2501                 * Our failure will be noticed either in the next iteration of
2502                 * the loop, or if this is the final delta, in the caller when
2503                 * we return NULL. Those code paths will take care of making
2504                 * a more explicit warning and retrying with another copy of
2505                 * the object.
2506                 */
2507                if (!data)
2508                        error("failed to apply delta");
2509
2510                free(delta_data);
2511        }
2512
2513        *final_type = type;
2514        *final_size = size;
2515
2516        unuse_pack(&w_curs);
2517
2518        if (delta_stack != small_delta_stack)
2519                free(delta_stack);
2520
2521        return data;
2522}
2523
2524const unsigned char *nth_packed_object_sha1(struct packed_git *p,
2525                                            uint32_t n)
2526{
2527        const unsigned char *index = p->index_data;
2528        if (!index) {
2529                if (open_pack_index(p))
2530                        return NULL;
2531                index = p->index_data;
2532        }
2533        if (n >= p->num_objects)
2534                return NULL;
2535        index += 4 * 256;
2536        if (p->index_version == 1) {
2537                return index + 24 * n + 4;
2538        } else {
2539                index += 8;
2540                return index + 20 * n;
2541        }
2542}
2543
2544void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
2545{
2546        const unsigned char *ptr = vptr;
2547        const unsigned char *start = p->index_data;
2548        const unsigned char *end = start + p->index_size;
2549        if (ptr < start)
2550                die(_("offset before start of pack index for %s (corrupt index?)"),
2551                    p->pack_name);
2552        /* No need to check for underflow; .idx files must be at least 8 bytes */
2553        if (ptr >= end - 8)
2554                die(_("offset beyond end of pack index for %s (truncated index?)"),
2555                    p->pack_name);
2556}
2557
2558off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
2559{
2560        const unsigned char *index = p->index_data;
2561        index += 4 * 256;
2562        if (p->index_version == 1) {
2563                return ntohl(*((uint32_t *)(index + 24 * n)));
2564        } else {
2565                uint32_t off;
2566                index += 8 + p->num_objects * (20 + 4);
2567                off = ntohl(*((uint32_t *)(index + 4 * n)));
2568                if (!(off & 0x80000000))
2569                        return off;
2570                index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
2571                check_pack_index_ptr(p, index);
2572                return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
2573                                   ntohl(*((uint32_t *)(index + 4)));
2574        }
2575}
2576
2577off_t find_pack_entry_one(const unsigned char *sha1,
2578                                  struct packed_git *p)
2579{
2580        const uint32_t *level1_ofs = p->index_data;
2581        const unsigned char *index = p->index_data;
2582        unsigned hi, lo, stride;
2583        static int use_lookup = -1;
2584        static int debug_lookup = -1;
2585
2586        if (debug_lookup < 0)
2587                debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
2588
2589        if (!index) {
2590                if (open_pack_index(p))
2591                        return 0;
2592                level1_ofs = p->index_data;
2593                index = p->index_data;
2594        }
2595        if (p->index_version > 1) {
2596                level1_ofs += 2;
2597                index += 8;
2598        }
2599        index += 4 * 256;
2600        hi = ntohl(level1_ofs[*sha1]);
2601        lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
2602        if (p->index_version > 1) {
2603                stride = 20;
2604        } else {
2605                stride = 24;
2606                index += 4;
2607        }
2608
2609        if (debug_lookup)
2610                printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
2611                       sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
2612
2613        if (use_lookup < 0)
2614                use_lookup = !!getenv("GIT_USE_LOOKUP");
2615        if (use_lookup) {
2616                int pos = sha1_entry_pos(index, stride, 0,
2617                                         lo, hi, p->num_objects, sha1);
2618                if (pos < 0)
2619                        return 0;
2620                return nth_packed_object_offset(p, pos);
2621        }
2622
2623        do {
2624                unsigned mi = (lo + hi) / 2;
2625                int cmp = hashcmp(index + mi * stride, sha1);
2626
2627                if (debug_lookup)
2628                        printf("lo %u hi %u rg %u mi %u\n",
2629                               lo, hi, hi - lo, mi);
2630                if (!cmp)
2631                        return nth_packed_object_offset(p, mi);
2632                if (cmp > 0)
2633                        hi = mi;
2634                else
2635                        lo = mi+1;
2636        } while (lo < hi);
2637        return 0;
2638}
2639
2640int is_pack_valid(struct packed_git *p)
2641{
2642        /* An already open pack is known to be valid. */
2643        if (p->pack_fd != -1)
2644                return 1;
2645
2646        /* If the pack has one window completely covering the
2647         * file size, the pack is known to be valid even if
2648         * the descriptor is not currently open.
2649         */
2650        if (p->windows) {
2651                struct pack_window *w = p->windows;
2652
2653                if (!w->offset && w->len == p->pack_size)
2654                        return 1;
2655        }
2656
2657        /* Force the pack to open to prove its valid. */
2658        return !open_packed_git(p);
2659}
2660
2661static int fill_pack_entry(const unsigned char *sha1,
2662                           struct pack_entry *e,
2663                           struct packed_git *p)
2664{
2665        off_t offset;
2666
2667        if (p->num_bad_objects) {
2668                unsigned i;
2669                for (i = 0; i < p->num_bad_objects; i++)
2670                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
2671                                return 0;
2672        }
2673
2674        offset = find_pack_entry_one(sha1, p);
2675        if (!offset)
2676                return 0;
2677
2678        /*
2679         * We are about to tell the caller where they can locate the
2680         * requested object.  We better make sure the packfile is
2681         * still here and can be accessed before supplying that
2682         * answer, as it may have been deleted since the index was
2683         * loaded!
2684         */
2685        if (!is_pack_valid(p))
2686                return 0;
2687        e->offset = offset;
2688        e->p = p;
2689        hashcpy(e->sha1, sha1);
2690        return 1;
2691}
2692
2693/*
2694 * Iff a pack file contains the object named by sha1, return true and
2695 * store its location to e.
2696 */
2697static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
2698{
2699        struct mru_entry *p;
2700
2701        prepare_packed_git();
2702        if (!packed_git)
2703                return 0;
2704
2705        for (p = packed_git_mru->head; p; p = p->next) {
2706                if (fill_pack_entry(sha1, e, p->item)) {
2707                        mru_mark(packed_git_mru, p);
2708                        return 1;
2709                }
2710        }
2711        return 0;
2712}
2713
2714struct packed_git *find_sha1_pack(const unsigned char *sha1,
2715                                  struct packed_git *packs)
2716{
2717        struct packed_git *p;
2718
2719        for (p = packs; p; p = p->next) {
2720                if (find_pack_entry_one(sha1, p))
2721                        return p;
2722        }
2723        return NULL;
2724
2725}
2726
2727static int sha1_loose_object_info(const unsigned char *sha1,
2728                                  struct object_info *oi,
2729                                  int flags)
2730{
2731        int status = 0;
2732        unsigned long mapsize;
2733        void *map;
2734        git_zstream stream;
2735        char hdr[32];
2736        struct strbuf hdrbuf = STRBUF_INIT;
2737
2738        if (oi->delta_base_sha1)
2739                hashclr(oi->delta_base_sha1);
2740
2741        /*
2742         * If we don't care about type or size, then we don't
2743         * need to look inside the object at all. Note that we
2744         * do not optimize out the stat call, even if the
2745         * caller doesn't care about the disk-size, since our
2746         * return value implicitly indicates whether the
2747         * object even exists.
2748         */
2749        if (!oi->typep && !oi->typename && !oi->sizep) {
2750                struct stat st;
2751                if (stat_sha1_file(sha1, &st) < 0)
2752                        return -1;
2753                if (oi->disk_sizep)
2754                        *oi->disk_sizep = st.st_size;
2755                return 0;
2756        }
2757
2758        map = map_sha1_file(sha1, &mapsize);
2759        if (!map)
2760                return -1;
2761        if (oi->disk_sizep)
2762                *oi->disk_sizep = mapsize;
2763        if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
2764                if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
2765                        status = error("unable to unpack %s header with --allow-unknown-type",
2766                                       sha1_to_hex(sha1));
2767        } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2768                status = error("unable to unpack %s header",
2769                               sha1_to_hex(sha1));
2770        if (status < 0)
2771                ; /* Do nothing */
2772        else if (hdrbuf.len) {
2773                if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
2774                        status = error("unable to parse %s header with --allow-unknown-type",
2775                                       sha1_to_hex(sha1));
2776        } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
2777                status = error("unable to parse %s header", sha1_to_hex(sha1));
2778        git_inflate_end(&stream);
2779        munmap(map, mapsize);
2780        if (status && oi->typep)
2781                *oi->typep = status;
2782        strbuf_release(&hdrbuf);
2783        return 0;
2784}
2785
2786int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
2787{
2788        struct cached_object *co;
2789        struct pack_entry e;
2790        int rtype;
2791        enum object_type real_type;
2792        const unsigned char *real = lookup_replace_object_extended(sha1, flags);
2793
2794        co = find_cached_object(real);
2795        if (co) {
2796                if (oi->typep)
2797                        *(oi->typep) = co->type;
2798                if (oi->sizep)
2799                        *(oi->sizep) = co->size;
2800                if (oi->disk_sizep)
2801                        *(oi->disk_sizep) = 0;
2802                if (oi->delta_base_sha1)
2803                        hashclr(oi->delta_base_sha1);
2804                if (oi->typename)
2805                        strbuf_addstr(oi->typename, typename(co->type));
2806                oi->whence = OI_CACHED;
2807                return 0;
2808        }
2809
2810        if (!find_pack_entry(real, &e)) {
2811                /* Most likely it's a loose object. */
2812                if (!sha1_loose_object_info(real, oi, flags)) {
2813                        oi->whence = OI_LOOSE;
2814                        return 0;
2815                }
2816
2817                /* Not a loose object; someone else may have just packed it. */
2818                reprepare_packed_git();
2819                if (!find_pack_entry(real, &e))
2820                        return -1;
2821        }
2822
2823        /*
2824         * packed_object_info() does not follow the delta chain to
2825         * find out the real type, unless it is given oi->typep.
2826         */
2827        if (oi->typename && !oi->typep)
2828                oi->typep = &real_type;
2829
2830        rtype = packed_object_info(e.p, e.offset, oi);
2831        if (rtype < 0) {
2832                mark_bad_packed_object(e.p, real);
2833                if (oi->typep == &real_type)
2834                        oi->typep = NULL;
2835                return sha1_object_info_extended(real, oi, 0);
2836        } else if (in_delta_base_cache(e.p, e.offset)) {
2837                oi->whence = OI_DBCACHED;
2838        } else {
2839                oi->whence = OI_PACKED;
2840                oi->u.packed.offset = e.offset;
2841                oi->u.packed.pack = e.p;
2842                oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
2843                                         rtype == OBJ_OFS_DELTA);
2844        }
2845        if (oi->typename)
2846                strbuf_addstr(oi->typename, typename(*oi->typep));
2847        if (oi->typep == &real_type)
2848                oi->typep = NULL;
2849
2850        return 0;
2851}
2852
2853/* returns enum object_type or negative */
2854int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
2855{
2856        enum object_type type;
2857        struct object_info oi = {NULL};
2858
2859        oi.typep = &type;
2860        oi.sizep = sizep;
2861        if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0)
2862                return -1;
2863        return type;
2864}
2865
2866static void *read_packed_sha1(const unsigned char *sha1,
2867                              enum object_type *type, unsigned long *size)
2868{
2869        struct pack_entry e;
2870        void *data;
2871
2872        if (!find_pack_entry(sha1, &e))
2873                return NULL;
2874        data = cache_or_unpack_entry(e.p, e.offset, size, type);
2875        if (!data) {
2876                /*
2877                 * We're probably in deep shit, but let's try to fetch
2878                 * the required object anyway from another pack or loose.
2879                 * This should happen only in the presence of a corrupted
2880                 * pack, and is better than failing outright.
2881                 */
2882                error("failed to read object %s at offset %"PRIuMAX" from %s",
2883                      sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
2884                mark_bad_packed_object(e.p, sha1);
2885                data = read_object(sha1, type, size);
2886        }
2887        return data;
2888}
2889
2890int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
2891                      unsigned char *sha1)
2892{
2893        struct cached_object *co;
2894
2895        hash_sha1_file(buf, len, typename(type), sha1);
2896        if (has_sha1_file(sha1) || find_cached_object(sha1))
2897                return 0;
2898        ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc);
2899        co = &cached_objects[cached_object_nr++];
2900        co->size = len;
2901        co->type = type;
2902        co->buf = xmalloc(len);
2903        memcpy(co->buf, buf, len);
2904        hashcpy(co->sha1, sha1);
2905        return 0;
2906}
2907
2908static void *read_object(const unsigned char *sha1, enum object_type *type,
2909                         unsigned long *size)
2910{
2911        unsigned long mapsize;
2912        void *map, *buf;
2913        struct cached_object *co;
2914
2915        co = find_cached_object(sha1);
2916        if (co) {
2917                *type = co->type;
2918                *size = co->size;
2919                return xmemdupz(co->buf, co->size);
2920        }
2921
2922        buf = read_packed_sha1(sha1, type, size);
2923        if (buf)
2924                return buf;
2925        map = map_sha1_file(sha1, &mapsize);
2926        if (map) {
2927                buf = unpack_sha1_file(map, mapsize, type, size, sha1);
2928                munmap(map, mapsize);
2929                return buf;
2930        }
2931        reprepare_packed_git();
2932        return read_packed_sha1(sha1, type, size);
2933}
2934
2935/*
2936 * This function dies on corrupt objects; the callers who want to
2937 * deal with them should arrange to call read_object() and give error
2938 * messages themselves.
2939 */
2940void *read_sha1_file_extended(const unsigned char *sha1,
2941                              enum object_type *type,
2942                              unsigned long *size,
2943                              unsigned flag)
2944{
2945        void *data;
2946        const struct packed_git *p;
2947        const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
2948
2949        errno = 0;
2950        data = read_object(repl, type, size);
2951        if (data)
2952                return data;
2953
2954        if (errno && errno != ENOENT)
2955                die_errno("failed to read object %s", sha1_to_hex(sha1));
2956
2957        /* die if we replaced an object with one that does not exist */
2958        if (repl != sha1)
2959                die("replacement %s not found for %s",
2960                    sha1_to_hex(repl), sha1_to_hex(sha1));
2961
2962        if (has_loose_object(repl)) {
2963                const char *path = sha1_file_name(sha1);
2964
2965                die("loose object %s (stored in %s) is corrupt",
2966                    sha1_to_hex(repl), path);
2967        }
2968
2969        if ((p = has_packed_and_bad(repl)) != NULL)
2970                die("packed object %s (stored in %s) is corrupt",
2971                    sha1_to_hex(repl), p->pack_name);
2972
2973        return NULL;
2974}
2975
2976void *read_object_with_reference(const unsigned char *sha1,
2977                                 const char *required_type_name,
2978                                 unsigned long *size,
2979                                 unsigned char *actual_sha1_return)
2980{
2981        enum object_type type, required_type;
2982        void *buffer;
2983        unsigned long isize;
2984        unsigned char actual_sha1[20];
2985
2986        required_type = type_from_string(required_type_name);
2987        hashcpy(actual_sha1, sha1);
2988        while (1) {
2989                int ref_length = -1;
2990                const char *ref_type = NULL;
2991
2992                buffer = read_sha1_file(actual_sha1, &type, &isize);
2993                if (!buffer)
2994                        return NULL;
2995                if (type == required_type) {
2996                        *size = isize;
2997                        if (actual_sha1_return)
2998                                hashcpy(actual_sha1_return, actual_sha1);
2999                        return buffer;
3000                }
3001                /* Handle references */
3002                else if (type == OBJ_COMMIT)
3003                        ref_type = "tree ";
3004                else if (type == OBJ_TAG)
3005                        ref_type = "object ";
3006                else {
3007                        free(buffer);
3008                        return NULL;
3009                }
3010                ref_length = strlen(ref_type);
3011
3012                if (ref_length + 40 > isize ||
3013                    memcmp(buffer, ref_type, ref_length) ||
3014                    get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
3015                        free(buffer);
3016                        return NULL;
3017                }
3018                free(buffer);
3019                /* Now we have the ID of the referred-to object in
3020                 * actual_sha1.  Check again. */
3021        }
3022}
3023
3024static void write_sha1_file_prepare(const void *buf, unsigned long len,
3025                                    const char *type, unsigned char *sha1,
3026                                    char *hdr, int *hdrlen)
3027{
3028        git_SHA_CTX c;
3029
3030        /* Generate the header */
3031        *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1;
3032
3033        /* Sha1.. */
3034        git_SHA1_Init(&c);
3035        git_SHA1_Update(&c, hdr, *hdrlen);
3036        git_SHA1_Update(&c, buf, len);
3037        git_SHA1_Final(sha1, &c);
3038}
3039
3040/*
3041 * Move the just written object into its final resting place.
3042 */
3043int finalize_object_file(const char *tmpfile, const char *filename)
3044{
3045        int ret = 0;
3046
3047        if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
3048                goto try_rename;
3049        else if (link(tmpfile, filename))
3050                ret = errno;
3051
3052        /*
3053         * Coda hack - coda doesn't like cross-directory links,
3054         * so we fall back to a rename, which will mean that it
3055         * won't be able to check collisions, but that's not a
3056         * big deal.
3057         *
3058         * The same holds for FAT formatted media.
3059         *
3060         * When this succeeds, we just return.  We have nothing
3061         * left to unlink.
3062         */
3063        if (ret && ret != EEXIST) {
3064        try_rename:
3065                if (!rename(tmpfile, filename))
3066                        goto out;
3067                ret = errno;
3068        }
3069        unlink_or_warn(tmpfile);
3070        if (ret) {
3071                if (ret != EEXIST) {
3072                        return error_errno("unable to write sha1 filename %s", filename);
3073                }
3074                /* FIXME!!! Collision check here ? */
3075        }
3076
3077out:
3078        if (adjust_shared_perm(filename))
3079                return error("unable to set permission to '%s'", filename);
3080        return 0;
3081}
3082
3083static int write_buffer(int fd, const void *buf, size_t len)
3084{
3085        if (write_in_full(fd, buf, len) < 0)
3086                return error_errno("file write error");
3087        return 0;
3088}
3089
3090int hash_sha1_file(const void *buf, unsigned long len, const char *type,
3091                   unsigned char *sha1)
3092{
3093        char hdr[32];
3094        int hdrlen = sizeof(hdr);
3095        write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
3096        return 0;
3097}
3098
3099/* Finalize a file on disk, and close it. */
3100static void close_sha1_file(int fd)
3101{
3102        if (fsync_object_files)
3103                fsync_or_die(fd, "sha1 file");
3104        if (close(fd) != 0)
3105                die_errno("error when closing sha1 file");
3106}
3107
3108/* Size of directory component, including the ending '/' */
3109static inline int directory_size(const char *filename)
3110{
3111        const char *s = strrchr(filename, '/');
3112        if (!s)
3113                return 0;
3114        return s - filename + 1;
3115}
3116
3117/*
3118 * This creates a temporary file in the same directory as the final
3119 * 'filename'
3120 *
3121 * We want to avoid cross-directory filename renames, because those
3122 * can have problems on various filesystems (FAT, NFS, Coda).
3123 */
3124static int create_tmpfile(struct strbuf *tmp, const char *filename)
3125{
3126        int fd, dirlen = directory_size(filename);
3127
3128        strbuf_reset(tmp);
3129        strbuf_add(tmp, filename, dirlen);
3130        strbuf_addstr(tmp, "tmp_obj_XXXXXX");
3131        fd = git_mkstemp_mode(tmp->buf, 0444);
3132        if (fd < 0 && dirlen && errno == ENOENT) {
3133                /*
3134                 * Make sure the directory exists; note that the contents
3135                 * of the buffer are undefined after mkstemp returns an
3136                 * error, so we have to rewrite the whole buffer from
3137                 * scratch.
3138                 */
3139                strbuf_reset(tmp);
3140                strbuf_add(tmp, filename, dirlen - 1);
3141                if (mkdir(tmp->buf, 0777) && errno != EEXIST)
3142                        return -1;
3143                if (adjust_shared_perm(tmp->buf))
3144                        return -1;
3145
3146                /* Try again */
3147                strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
3148                fd = git_mkstemp_mode(tmp->buf, 0444);
3149        }
3150        return fd;
3151}
3152
3153static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
3154                              const void *buf, unsigned long len, time_t mtime)
3155{
3156        int fd, ret;
3157        unsigned char compressed[4096];
3158        git_zstream stream;
3159        git_SHA_CTX c;
3160        unsigned char parano_sha1[20];
3161        static struct strbuf tmp_file = STRBUF_INIT;
3162        const char *filename = sha1_file_name(sha1);
3163
3164        fd = create_tmpfile(&tmp_file, filename);
3165        if (fd < 0) {
3166                if (errno == EACCES)
3167                        return error("insufficient permission for adding an object to repository database %s", get_object_directory());
3168                else
3169                        return error_errno("unable to create temporary file");
3170        }
3171
3172        /* Set it up */
3173        git_deflate_init(&stream, zlib_compression_level);
3174        stream.next_out = compressed;
3175        stream.avail_out = sizeof(compressed);
3176        git_SHA1_Init(&c);
3177
3178        /* First header.. */
3179        stream.next_in = (unsigned char *)hdr;
3180        stream.avail_in = hdrlen;
3181        while (git_deflate(&stream, 0) == Z_OK)
3182                ; /* nothing */
3183        git_SHA1_Update(&c, hdr, hdrlen);
3184
3185        /* Then the data itself.. */
3186        stream.next_in = (void *)buf;
3187        stream.avail_in = len;
3188        do {
3189                unsigned char *in0 = stream.next_in;
3190                ret = git_deflate(&stream, Z_FINISH);
3191                git_SHA1_Update(&c, in0, stream.next_in - in0);
3192                if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
3193                        die("unable to write sha1 file");
3194                stream.next_out = compressed;
3195                stream.avail_out = sizeof(compressed);
3196        } while (ret == Z_OK);
3197
3198        if (ret != Z_STREAM_END)
3199                die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
3200        ret = git_deflate_end_gently(&stream);
3201        if (ret != Z_OK)
3202                die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
3203        git_SHA1_Final(parano_sha1, &c);
3204        if (hashcmp(sha1, parano_sha1) != 0)
3205                die("confused by unstable object source data for %s", sha1_to_hex(sha1));
3206
3207        close_sha1_file(fd);
3208
3209        if (mtime) {
3210                struct utimbuf utb;
3211                utb.actime = mtime;
3212                utb.modtime = mtime;
3213                if (utime(tmp_file.buf, &utb) < 0)
3214                        warning_errno("failed utime() on %s", tmp_file.buf);
3215        }
3216
3217        return finalize_object_file(tmp_file.buf, filename);
3218}
3219
3220static int freshen_loose_object(const unsigned char *sha1)
3221{
3222        return check_and_freshen(sha1, 1);
3223}
3224
3225static int freshen_packed_object(const unsigned char *sha1)
3226{
3227        struct pack_entry e;
3228        if (!find_pack_entry(sha1, &e))
3229                return 0;
3230        if (e.p->freshened)
3231                return 1;
3232        if (!freshen_file(e.p->pack_name))
3233                return 0;
3234        e.p->freshened = 1;
3235        return 1;
3236}
3237
3238int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
3239{
3240        char hdr[32];
3241        int hdrlen = sizeof(hdr);
3242
3243        /* Normally if we have it in the pack then we do not bother writing
3244         * it out into .git/objects/??/?{38} file.
3245         */
3246        write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
3247        if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3248                return 0;
3249        return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
3250}
3251
3252int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
3253                             unsigned char *sha1, unsigned flags)
3254{
3255        char *header;
3256        int hdrlen, status = 0;
3257
3258        /* type string, SP, %lu of the length plus NUL must fit this */
3259        hdrlen = strlen(type) + 32;
3260        header = xmalloc(hdrlen);
3261        write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);
3262
3263        if (!(flags & HASH_WRITE_OBJECT))
3264                goto cleanup;
3265        if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3266                goto cleanup;
3267        status = write_loose_object(sha1, header, hdrlen, buf, len, 0);
3268
3269cleanup:
3270        free(header);
3271        return status;
3272}
3273
3274int force_object_loose(const unsigned char *sha1, time_t mtime)
3275{
3276        void *buf;
3277        unsigned long len;
3278        enum object_type type;
3279        char hdr[32];
3280        int hdrlen;
3281        int ret;
3282
3283        if (has_loose_object(sha1))
3284                return 0;
3285        buf = read_packed_sha1(sha1, &type, &len);
3286        if (!buf)
3287                return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
3288        hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1;
3289        ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
3290        free(buf);
3291
3292        return ret;
3293}
3294
3295int has_pack_index(const unsigned char *sha1)
3296{
3297        struct stat st;
3298        if (stat(sha1_pack_index_name(sha1), &st))
3299                return 0;
3300        return 1;
3301}
3302
3303int has_sha1_pack(const unsigned char *sha1)
3304{
3305        struct pack_entry e;
3306        return find_pack_entry(sha1, &e);
3307}
3308
3309int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
3310{
3311        struct pack_entry e;
3312
3313        if (find_pack_entry(sha1, &e))
3314                return 1;
3315        if (has_loose_object(sha1))
3316                return 1;
3317        if (flags & HAS_SHA1_QUICK)
3318                return 0;
3319        reprepare_packed_git();
3320        return find_pack_entry(sha1, &e);
3321}
3322
3323int has_object_file(const struct object_id *oid)
3324{
3325        return has_sha1_file(oid->hash);
3326}
3327
3328static void check_tree(const void *buf, size_t size)
3329{
3330        struct tree_desc desc;
3331        struct name_entry entry;
3332
3333        init_tree_desc(&desc, buf, size);
3334        while (tree_entry(&desc, &entry))
3335                /* do nothing
3336                 * tree_entry() will die() on malformed entries */
3337                ;
3338}
3339
3340static void check_commit(const void *buf, size_t size)
3341{
3342        struct commit c;
3343        memset(&c, 0, sizeof(c));
3344        if (parse_commit_buffer(&c, buf, size))
3345                die("corrupt commit");
3346}
3347
3348static void check_tag(const void *buf, size_t size)
3349{
3350        struct tag t;
3351        memset(&t, 0, sizeof(t));
3352        if (parse_tag_buffer(&t, buf, size))
3353                die("corrupt tag");
3354}
3355
3356static int index_mem(unsigned char *sha1, void *buf, size_t size,
3357                     enum object_type type,
3358                     const char *path, unsigned flags)
3359{
3360        int ret, re_allocated = 0;
3361        int write_object = flags & HASH_WRITE_OBJECT;
3362
3363        if (!type)
3364                type = OBJ_BLOB;
3365
3366        /*
3367         * Convert blobs to git internal format
3368         */
3369        if ((type == OBJ_BLOB) && path) {
3370                struct strbuf nbuf = STRBUF_INIT;
3371                if (convert_to_git(path, buf, size, &nbuf,
3372                                   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
3373                        buf = strbuf_detach(&nbuf, &size);
3374                        re_allocated = 1;
3375                }
3376        }
3377        if (flags & HASH_FORMAT_CHECK) {
3378                if (type == OBJ_TREE)
3379                        check_tree(buf, size);
3380                if (type == OBJ_COMMIT)
3381                        check_commit(buf, size);
3382                if (type == OBJ_TAG)
3383                        check_tag(buf, size);
3384        }
3385
3386        if (write_object)
3387                ret = write_sha1_file(buf, size, typename(type), sha1);
3388        else
3389                ret = hash_sha1_file(buf, size, typename(type), sha1);
3390        if (re_allocated)
3391                free(buf);
3392        return ret;
3393}
3394
3395static int index_stream_convert_blob(unsigned char *sha1, int fd,
3396                                     const char *path, unsigned flags)
3397{
3398        int ret;
3399        const int write_object = flags & HASH_WRITE_OBJECT;
3400        struct strbuf sbuf = STRBUF_INIT;
3401
3402        assert(path);
3403        assert(would_convert_to_git_filter_fd(path));
3404
3405        convert_to_git_filter_fd(path, fd, &sbuf,
3406                                 write_object ? safe_crlf : SAFE_CRLF_FALSE);
3407
3408        if (write_object)
3409                ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
3410                                      sha1);
3411        else
3412                ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
3413                                     sha1);
3414        strbuf_release(&sbuf);
3415        return ret;
3416}
3417
3418static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
3419                      const char *path, unsigned flags)
3420{
3421        struct strbuf sbuf = STRBUF_INIT;
3422        int ret;
3423
3424        if (strbuf_read(&sbuf, fd, 4096) >= 0)
3425                ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);
3426        else
3427                ret = -1;
3428        strbuf_release(&sbuf);
3429        return ret;
3430}
3431
3432#define SMALL_FILE_SIZE (32*1024)
3433
3434static int index_core(unsigned char *sha1, int fd, size_t size,
3435                      enum object_type type, const char *path,
3436                      unsigned flags)
3437{
3438        int ret;
3439
3440        if (!size) {
3441                ret = index_mem(sha1, "", size, type, path, flags);
3442        } else if (size <= SMALL_FILE_SIZE) {
3443                char *buf = xmalloc(size);
3444                if (size == read_in_full(fd, buf, size))
3445                        ret = index_mem(sha1, buf, size, type, path, flags);
3446                else
3447                        ret = error_errno("short read");
3448                free(buf);
3449        } else {
3450                void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
3451                ret = index_mem(sha1, buf, size, type, path, flags);
3452                munmap(buf, size);
3453        }
3454        return ret;
3455}
3456
3457/*
3458 * This creates one packfile per large blob unless bulk-checkin
3459 * machinery is "plugged".
3460 *
3461 * This also bypasses the usual "convert-to-git" dance, and that is on
3462 * purpose. We could write a streaming version of the converting
3463 * functions and insert that before feeding the data to fast-import
3464 * (or equivalent in-core API described above). However, that is
3465 * somewhat complicated, as we do not know the size of the filter
3466 * result, which we need to know beforehand when writing a git object.
3467 * Since the primary motivation for trying to stream from the working
3468 * tree file and to avoid mmaping it in core is to deal with large
3469 * binary blobs, they generally do not want to get any conversion, and
3470 * callers should avoid this code path when filters are requested.
3471 */
3472static int index_stream(unsigned char *sha1, int fd, size_t size,
3473                        enum object_type type, const char *path,
3474                        unsigned flags)
3475{
3476        return index_bulk_checkin(sha1, fd, size, type, path, flags);
3477}
3478
3479int index_fd(unsigned char *sha1, int fd, struct stat *st,
3480             enum object_type type, const char *path, unsigned flags)
3481{
3482        int ret;
3483
3484        /*
3485         * Call xsize_t() only when needed to avoid potentially unnecessary
3486         * die() for large files.
3487         */
3488        if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
3489                ret = index_stream_convert_blob(sha1, fd, path, flags);
3490        else if (!S_ISREG(st->st_mode))
3491                ret = index_pipe(sha1, fd, type, path, flags);
3492        else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
3493                 (path && would_convert_to_git(path)))
3494                ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
3495                                 flags);
3496        else
3497                ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
3498                                   flags);
3499        close(fd);
3500        return ret;
3501}
3502
3503int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
3504{
3505        int fd;
3506        struct strbuf sb = STRBUF_INIT;
3507
3508        switch (st->st_mode & S_IFMT) {
3509        case S_IFREG:
3510                fd = open(path, O_RDONLY);
3511                if (fd < 0)
3512                        return error_errno("open(\"%s\")", path);
3513                if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
3514                        return error("%s: failed to insert into database",
3515                                     path);
3516                break;
3517        case S_IFLNK:
3518                if (strbuf_readlink(&sb, path, st->st_size))
3519                        return error_errno("readlink(\"%s\")", path);
3520                if (!(flags & HASH_WRITE_OBJECT))
3521                        hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
3522                else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
3523                        return error("%s: failed to insert into database",
3524                                     path);
3525                strbuf_release(&sb);
3526                break;
3527        case S_IFDIR:
3528                return resolve_gitlink_ref(path, "HEAD", sha1);
3529        default:
3530                return error("%s: unsupported file type", path);
3531        }
3532        return 0;
3533}
3534
3535int read_pack_header(int fd, struct pack_header *header)
3536{
3537        if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
3538                /* "eof before pack header was fully read" */
3539                return PH_ERROR_EOF;
3540
3541        if (header->hdr_signature != htonl(PACK_SIGNATURE))
3542                /* "protocol error (pack signature mismatch detected)" */
3543                return PH_ERROR_PACK_SIGNATURE;
3544        if (!pack_version_ok(header->hdr_version))
3545                /* "protocol error (pack version unsupported)" */
3546                return PH_ERROR_PROTOCOL;
3547        return 0;
3548}
3549
3550void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
3551{
3552        enum object_type type = sha1_object_info(sha1, NULL);
3553        if (type < 0)
3554                die("%s is not a valid object", sha1_to_hex(sha1));
3555        if (type != expect)
3556                die("%s is not a valid '%s' object", sha1_to_hex(sha1),
3557                    typename(expect));
3558}
3559
3560static int for_each_file_in_obj_subdir(int subdir_nr,
3561                                       struct strbuf *path,
3562                                       each_loose_object_fn obj_cb,
3563                                       each_loose_cruft_fn cruft_cb,
3564                                       each_loose_subdir_fn subdir_cb,
3565                                       void *data)
3566{
3567        size_t baselen = path->len;
3568        DIR *dir = opendir(path->buf);
3569        struct dirent *de;
3570        int r = 0;
3571
3572        if (!dir) {
3573                if (errno == ENOENT)
3574                        return 0;
3575                return error_errno("unable to open %s", path->buf);
3576        }
3577
3578        while ((de = readdir(dir))) {
3579                if (is_dot_or_dotdot(de->d_name))
3580                        continue;
3581
3582                strbuf_setlen(path, baselen);
3583                strbuf_addf(path, "/%s", de->d_name);
3584
3585                if (strlen(de->d_name) == 38)  {
3586                        char hex[41];
3587                        unsigned char sha1[20];
3588
3589                        snprintf(hex, sizeof(hex), "%02x%s",
3590                                 subdir_nr, de->d_name);
3591                        if (!get_sha1_hex(hex, sha1)) {
3592                                if (obj_cb) {
3593                                        r = obj_cb(sha1, path->buf, data);
3594                                        if (r)
3595                                                break;
3596                                }
3597                                continue;
3598                        }
3599                }
3600
3601                if (cruft_cb) {
3602                        r = cruft_cb(de->d_name, path->buf, data);
3603                        if (r)
3604                                break;
3605                }
3606        }
3607        closedir(dir);
3608
3609        strbuf_setlen(path, baselen);
3610        if (!r && subdir_cb)
3611                r = subdir_cb(subdir_nr, path->buf, data);
3612
3613        return r;
3614}
3615
3616int for_each_loose_file_in_objdir_buf(struct strbuf *path,
3617                            each_loose_object_fn obj_cb,
3618                            each_loose_cruft_fn cruft_cb,
3619                            each_loose_subdir_fn subdir_cb,
3620                            void *data)
3621{
3622        size_t baselen = path->len;
3623        int r = 0;
3624        int i;
3625
3626        for (i = 0; i < 256; i++) {
3627                strbuf_addf(path, "/%02x", i);
3628                r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
3629                                                subdir_cb, data);
3630                strbuf_setlen(path, baselen);
3631                if (r)
3632                        break;
3633        }
3634
3635        return r;
3636}
3637
3638int for_each_loose_file_in_objdir(const char *path,
3639                                  each_loose_object_fn obj_cb,
3640                                  each_loose_cruft_fn cruft_cb,
3641                                  each_loose_subdir_fn subdir_cb,
3642                                  void *data)
3643{
3644        struct strbuf buf = STRBUF_INIT;
3645        int r;
3646
3647        strbuf_addstr(&buf, path);
3648        r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
3649                                              subdir_cb, data);
3650        strbuf_release(&buf);
3651
3652        return r;
3653}
3654
3655struct loose_alt_odb_data {
3656        each_loose_object_fn *cb;
3657        void *data;
3658};
3659
3660static int loose_from_alt_odb(struct alternate_object_database *alt,
3661                              void *vdata)
3662{
3663        struct loose_alt_odb_data *data = vdata;
3664        struct strbuf buf = STRBUF_INIT;
3665        int r;
3666
3667        /* copy base not including trailing '/' */
3668        strbuf_add(&buf, alt->base, alt->name - alt->base - 1);
3669        r = for_each_loose_file_in_objdir_buf(&buf,
3670                                              data->cb, NULL, NULL,
3671                                              data->data);
3672        strbuf_release(&buf);
3673        return r;
3674}
3675
3676int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
3677{
3678        struct loose_alt_odb_data alt;
3679        int r;
3680
3681        r = for_each_loose_file_in_objdir(get_object_directory(),
3682                                          cb, NULL, NULL, data);
3683        if (r)
3684                return r;
3685
3686        if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
3687                return 0;
3688
3689        alt.cb = cb;
3690        alt.data = data;
3691        return foreach_alt_odb(loose_from_alt_odb, &alt);
3692}
3693
3694static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
3695{
3696        uint32_t i;
3697        int r = 0;
3698
3699        for (i = 0; i < p->num_objects; i++) {
3700                const unsigned char *sha1 = nth_packed_object_sha1(p, i);
3701
3702                if (!sha1)
3703                        return error("unable to get sha1 of object %u in %s",
3704                                     i, p->pack_name);
3705
3706                r = cb(sha1, p, i, data);
3707                if (r)
3708                        break;
3709        }
3710        return r;
3711}
3712
3713int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
3714{
3715        struct packed_git *p;
3716        int r = 0;
3717        int pack_errors = 0;
3718
3719        prepare_packed_git();
3720        for (p = packed_git; p; p = p->next) {
3721                if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
3722                        continue;
3723                if (open_pack_index(p)) {
3724                        pack_errors = 1;
3725                        continue;
3726                }
3727                r = for_each_object_in_pack(p, cb, data);
3728                if (r)
3729                        break;
3730        }
3731        return r ? r : pack_errors;
3732}