13479a060c9c1bfd72df8c683e7fcff2ed92bd28
   1/*
   2 * GIT - The information manager from hell
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This handles basic git sha1 object files - packing, unpacking,
   7 * creation etc.
   8 */
   9#include "cache.h"
  10#include "config.h"
  11#include "string-list.h"
  12#include "lockfile.h"
  13#include "delta.h"
  14#include "pack.h"
  15#include "blob.h"
  16#include "commit.h"
  17#include "run-command.h"
  18#include "tag.h"
  19#include "tree.h"
  20#include "tree-walk.h"
  21#include "refs.h"
  22#include "pack-revindex.h"
  23#include "sha1-lookup.h"
  24#include "bulk-checkin.h"
  25#include "streaming.h"
  26#include "dir.h"
  27#include "mru.h"
  28#include "list.h"
  29#include "mergesort.h"
  30#include "quote.h"
  31#include "packfile.h"
  32
  33const unsigned char null_sha1[20];
  34const struct object_id null_oid;
  35const struct object_id empty_tree_oid = {
  36        EMPTY_TREE_SHA1_BIN_LITERAL
  37};
  38const struct object_id empty_blob_oid = {
  39        EMPTY_BLOB_SHA1_BIN_LITERAL
  40};
  41
  42/*
  43 * This is meant to hold a *small* number of objects that you would
  44 * want read_sha1_file() to be able to return, but yet you do not want
  45 * to write them into the object store (e.g. a browse-only
  46 * application).
  47 */
  48static struct cached_object {
  49        unsigned char sha1[20];
  50        enum object_type type;
  51        void *buf;
  52        unsigned long size;
  53} *cached_objects;
  54static int cached_object_nr, cached_object_alloc;
  55
  56static struct cached_object empty_tree = {
  57        EMPTY_TREE_SHA1_BIN_LITERAL,
  58        OBJ_TREE,
  59        "",
  60        0
  61};
  62
  63static struct cached_object *find_cached_object(const unsigned char *sha1)
  64{
  65        int i;
  66        struct cached_object *co = cached_objects;
  67
  68        for (i = 0; i < cached_object_nr; i++, co++) {
  69                if (!hashcmp(co->sha1, sha1))
  70                        return co;
  71        }
  72        if (!hashcmp(sha1, empty_tree.sha1))
  73                return &empty_tree;
  74        return NULL;
  75}
  76
  77int mkdir_in_gitdir(const char *path)
  78{
  79        if (mkdir(path, 0777)) {
  80                int saved_errno = errno;
  81                struct stat st;
  82                struct strbuf sb = STRBUF_INIT;
  83
  84                if (errno != EEXIST)
  85                        return -1;
  86                /*
  87                 * Are we looking at a path in a symlinked worktree
  88                 * whose original repository does not yet have it?
  89                 * e.g. .git/rr-cache pointing at its original
  90                 * repository in which the user hasn't performed any
  91                 * conflict resolution yet?
  92                 */
  93                if (lstat(path, &st) || !S_ISLNK(st.st_mode) ||
  94                    strbuf_readlink(&sb, path, st.st_size) ||
  95                    !is_absolute_path(sb.buf) ||
  96                    mkdir(sb.buf, 0777)) {
  97                        strbuf_release(&sb);
  98                        errno = saved_errno;
  99                        return -1;
 100                }
 101                strbuf_release(&sb);
 102        }
 103        return adjust_shared_perm(path);
 104}
 105
 106enum scld_error safe_create_leading_directories(char *path)
 107{
 108        char *next_component = path + offset_1st_component(path);
 109        enum scld_error ret = SCLD_OK;
 110
 111        while (ret == SCLD_OK && next_component) {
 112                struct stat st;
 113                char *slash = next_component, slash_character;
 114
 115                while (*slash && !is_dir_sep(*slash))
 116                        slash++;
 117
 118                if (!*slash)
 119                        break;
 120
 121                next_component = slash + 1;
 122                while (is_dir_sep(*next_component))
 123                        next_component++;
 124                if (!*next_component)
 125                        break;
 126
 127                slash_character = *slash;
 128                *slash = '\0';
 129                if (!stat(path, &st)) {
 130                        /* path exists */
 131                        if (!S_ISDIR(st.st_mode)) {
 132                                errno = ENOTDIR;
 133                                ret = SCLD_EXISTS;
 134                        }
 135                } else if (mkdir(path, 0777)) {
 136                        if (errno == EEXIST &&
 137                            !stat(path, &st) && S_ISDIR(st.st_mode))
 138                                ; /* somebody created it since we checked */
 139                        else if (errno == ENOENT)
 140                                /*
 141                                 * Either mkdir() failed because
 142                                 * somebody just pruned the containing
 143                                 * directory, or stat() failed because
 144                                 * the file that was in our way was
 145                                 * just removed.  Either way, inform
 146                                 * the caller that it might be worth
 147                                 * trying again:
 148                                 */
 149                                ret = SCLD_VANISHED;
 150                        else
 151                                ret = SCLD_FAILED;
 152                } else if (adjust_shared_perm(path)) {
 153                        ret = SCLD_PERMS;
 154                }
 155                *slash = slash_character;
 156        }
 157        return ret;
 158}
 159
 160enum scld_error safe_create_leading_directories_const(const char *path)
 161{
 162        int save_errno;
 163        /* path points to cache entries, so xstrdup before messing with it */
 164        char *buf = xstrdup(path);
 165        enum scld_error result = safe_create_leading_directories(buf);
 166
 167        save_errno = errno;
 168        free(buf);
 169        errno = save_errno;
 170        return result;
 171}
 172
 173int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
 174{
 175        /*
 176         * The number of times we will try to remove empty directories
 177         * in the way of path. This is only 1 because if another
 178         * process is racily creating directories that conflict with
 179         * us, we don't want to fight against them.
 180         */
 181        int remove_directories_remaining = 1;
 182
 183        /*
 184         * The number of times that we will try to create the
 185         * directories containing path. We are willing to attempt this
 186         * more than once, because another process could be trying to
 187         * clean up empty directories at the same time as we are
 188         * trying to create them.
 189         */
 190        int create_directories_remaining = 3;
 191
 192        /* A scratch copy of path, filled lazily if we need it: */
 193        struct strbuf path_copy = STRBUF_INIT;
 194
 195        int ret, save_errno;
 196
 197        /* Sanity check: */
 198        assert(*path);
 199
 200retry_fn:
 201        ret = fn(path, cb);
 202        save_errno = errno;
 203        if (!ret)
 204                goto out;
 205
 206        if (errno == EISDIR && remove_directories_remaining-- > 0) {
 207                /*
 208                 * A directory is in the way. Maybe it is empty; try
 209                 * to remove it:
 210                 */
 211                if (!path_copy.len)
 212                        strbuf_addstr(&path_copy, path);
 213
 214                if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
 215                        goto retry_fn;
 216        } else if (errno == ENOENT && create_directories_remaining-- > 0) {
 217                /*
 218                 * Maybe the containing directory didn't exist, or
 219                 * maybe it was just deleted by a process that is
 220                 * racing with us to clean up empty directories. Try
 221                 * to create it:
 222                 */
 223                enum scld_error scld_result;
 224
 225                if (!path_copy.len)
 226                        strbuf_addstr(&path_copy, path);
 227
 228                do {
 229                        scld_result = safe_create_leading_directories(path_copy.buf);
 230                        if (scld_result == SCLD_OK)
 231                                goto retry_fn;
 232                } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
 233        }
 234
 235out:
 236        strbuf_release(&path_copy);
 237        errno = save_errno;
 238        return ret;
 239}
 240
 241static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
 242{
 243        int i;
 244        for (i = 0; i < 20; i++) {
 245                static char hex[] = "0123456789abcdef";
 246                unsigned int val = sha1[i];
 247                strbuf_addch(buf, hex[val >> 4]);
 248                strbuf_addch(buf, hex[val & 0xf]);
 249                if (!i)
 250                        strbuf_addch(buf, '/');
 251        }
 252}
 253
 254const char *sha1_file_name(const unsigned char *sha1)
 255{
 256        static struct strbuf buf = STRBUF_INIT;
 257
 258        strbuf_reset(&buf);
 259        strbuf_addf(&buf, "%s/", get_object_directory());
 260
 261        fill_sha1_path(&buf, sha1);
 262        return buf.buf;
 263}
 264
 265struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
 266{
 267        strbuf_setlen(&alt->scratch, alt->base_len);
 268        return &alt->scratch;
 269}
 270
 271static const char *alt_sha1_path(struct alternate_object_database *alt,
 272                                 const unsigned char *sha1)
 273{
 274        struct strbuf *buf = alt_scratch_buf(alt);
 275        fill_sha1_path(buf, sha1);
 276        return buf->buf;
 277}
 278
 279struct alternate_object_database *alt_odb_list;
 280static struct alternate_object_database **alt_odb_tail;
 281
 282/*
 283 * Return non-zero iff the path is usable as an alternate object database.
 284 */
 285static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
 286{
 287        struct alternate_object_database *alt;
 288
 289        /* Detect cases where alternate disappeared */
 290        if (!is_directory(path->buf)) {
 291                error("object directory %s does not exist; "
 292                      "check .git/objects/info/alternates.",
 293                      path->buf);
 294                return 0;
 295        }
 296
 297        /*
 298         * Prevent the common mistake of listing the same
 299         * thing twice, or object directory itself.
 300         */
 301        for (alt = alt_odb_list; alt; alt = alt->next) {
 302                if (!fspathcmp(path->buf, alt->path))
 303                        return 0;
 304        }
 305        if (!fspathcmp(path->buf, normalized_objdir))
 306                return 0;
 307
 308        return 1;
 309}
 310
 311/*
 312 * Prepare alternate object database registry.
 313 *
 314 * The variable alt_odb_list points at the list of struct
 315 * alternate_object_database.  The elements on this list come from
 316 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
 317 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
 318 * whose contents is similar to that environment variable but can be
 319 * LF separated.  Its base points at a statically allocated buffer that
 320 * contains "/the/directory/corresponding/to/.git/objects/...", while
 321 * its name points just after the slash at the end of ".git/objects/"
 322 * in the example above, and has enough space to hold 40-byte hex
 323 * SHA1, an extra slash for the first level indirection, and the
 324 * terminating NUL.
 325 */
 326static void read_info_alternates(const char * relative_base, int depth);
 327static int link_alt_odb_entry(const char *entry, const char *relative_base,
 328        int depth, const char *normalized_objdir)
 329{
 330        struct alternate_object_database *ent;
 331        struct strbuf pathbuf = STRBUF_INIT;
 332
 333        if (!is_absolute_path(entry) && relative_base) {
 334                strbuf_realpath(&pathbuf, relative_base, 1);
 335                strbuf_addch(&pathbuf, '/');
 336        }
 337        strbuf_addstr(&pathbuf, entry);
 338
 339        if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
 340                error("unable to normalize alternate object path: %s",
 341                      pathbuf.buf);
 342                strbuf_release(&pathbuf);
 343                return -1;
 344        }
 345
 346        /*
 347         * The trailing slash after the directory name is given by
 348         * this function at the end. Remove duplicates.
 349         */
 350        while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
 351                strbuf_setlen(&pathbuf, pathbuf.len - 1);
 352
 353        if (!alt_odb_usable(&pathbuf, normalized_objdir)) {
 354                strbuf_release(&pathbuf);
 355                return -1;
 356        }
 357
 358        ent = alloc_alt_odb(pathbuf.buf);
 359
 360        /* add the alternate entry */
 361        *alt_odb_tail = ent;
 362        alt_odb_tail = &(ent->next);
 363        ent->next = NULL;
 364
 365        /* recursively add alternates */
 366        read_info_alternates(pathbuf.buf, depth + 1);
 367
 368        strbuf_release(&pathbuf);
 369        return 0;
 370}
 371
 372static const char *parse_alt_odb_entry(const char *string,
 373                                       int sep,
 374                                       struct strbuf *out)
 375{
 376        const char *end;
 377
 378        strbuf_reset(out);
 379
 380        if (*string == '#') {
 381                /* comment; consume up to next separator */
 382                end = strchrnul(string, sep);
 383        } else if (*string == '"' && !unquote_c_style(out, string, &end)) {
 384                /*
 385                 * quoted path; unquote_c_style has copied the
 386                 * data for us and set "end". Broken quoting (e.g.,
 387                 * an entry that doesn't end with a quote) falls
 388                 * back to the unquoted case below.
 389                 */
 390        } else {
 391                /* normal, unquoted path */
 392                end = strchrnul(string, sep);
 393                strbuf_add(out, string, end - string);
 394        }
 395
 396        if (*end)
 397                end++;
 398        return end;
 399}
 400
 401static void link_alt_odb_entries(const char *alt, int len, int sep,
 402                                 const char *relative_base, int depth)
 403{
 404        struct strbuf objdirbuf = STRBUF_INIT;
 405        struct strbuf entry = STRBUF_INIT;
 406
 407        if (depth > 5) {
 408                error("%s: ignoring alternate object stores, nesting too deep.",
 409                                relative_base);
 410                return;
 411        }
 412
 413        strbuf_add_absolute_path(&objdirbuf, get_object_directory());
 414        if (strbuf_normalize_path(&objdirbuf) < 0)
 415                die("unable to normalize object directory: %s",
 416                    objdirbuf.buf);
 417
 418        while (*alt) {
 419                alt = parse_alt_odb_entry(alt, sep, &entry);
 420                if (!entry.len)
 421                        continue;
 422                link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf);
 423        }
 424        strbuf_release(&entry);
 425        strbuf_release(&objdirbuf);
 426}
 427
 428static void read_info_alternates(const char * relative_base, int depth)
 429{
 430        char *map;
 431        size_t mapsz;
 432        struct stat st;
 433        char *path;
 434        int fd;
 435
 436        path = xstrfmt("%s/info/alternates", relative_base);
 437        fd = git_open(path);
 438        free(path);
 439        if (fd < 0)
 440                return;
 441        if (fstat(fd, &st) || (st.st_size == 0)) {
 442                close(fd);
 443                return;
 444        }
 445        mapsz = xsize_t(st.st_size);
 446        map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
 447        close(fd);
 448
 449        link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
 450
 451        munmap(map, mapsz);
 452}
 453
 454struct alternate_object_database *alloc_alt_odb(const char *dir)
 455{
 456        struct alternate_object_database *ent;
 457
 458        FLEX_ALLOC_STR(ent, path, dir);
 459        strbuf_init(&ent->scratch, 0);
 460        strbuf_addf(&ent->scratch, "%s/", dir);
 461        ent->base_len = ent->scratch.len;
 462
 463        return ent;
 464}
 465
 466void add_to_alternates_file(const char *reference)
 467{
 468        struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
 469        char *alts = git_pathdup("objects/info/alternates");
 470        FILE *in, *out;
 471
 472        hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
 473        out = fdopen_lock_file(lock, "w");
 474        if (!out)
 475                die_errno("unable to fdopen alternates lockfile");
 476
 477        in = fopen(alts, "r");
 478        if (in) {
 479                struct strbuf line = STRBUF_INIT;
 480                int found = 0;
 481
 482                while (strbuf_getline(&line, in) != EOF) {
 483                        if (!strcmp(reference, line.buf)) {
 484                                found = 1;
 485                                break;
 486                        }
 487                        fprintf_or_die(out, "%s\n", line.buf);
 488                }
 489
 490                strbuf_release(&line);
 491                fclose(in);
 492
 493                if (found) {
 494                        rollback_lock_file(lock);
 495                        lock = NULL;
 496                }
 497        }
 498        else if (errno != ENOENT)
 499                die_errno("unable to read alternates file");
 500
 501        if (lock) {
 502                fprintf_or_die(out, "%s\n", reference);
 503                if (commit_lock_file(lock))
 504                        die_errno("unable to move new alternates file into place");
 505                if (alt_odb_tail)
 506                        link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
 507        }
 508        free(alts);
 509}
 510
 511void add_to_alternates_memory(const char *reference)
 512{
 513        /*
 514         * Make sure alternates are initialized, or else our entry may be
 515         * overwritten when they are.
 516         */
 517        prepare_alt_odb();
 518
 519        link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
 520}
 521
 522/*
 523 * Compute the exact path an alternate is at and returns it. In case of
 524 * error NULL is returned and the human readable error is added to `err`
 525 * `path` may be relative and should point to $GITDIR.
 526 * `err` must not be null.
 527 */
 528char *compute_alternate_path(const char *path, struct strbuf *err)
 529{
 530        char *ref_git = NULL;
 531        const char *repo, *ref_git_s;
 532        int seen_error = 0;
 533
 534        ref_git_s = real_path_if_valid(path);
 535        if (!ref_git_s) {
 536                seen_error = 1;
 537                strbuf_addf(err, _("path '%s' does not exist"), path);
 538                goto out;
 539        } else
 540                /*
 541                 * Beware: read_gitfile(), real_path() and mkpath()
 542                 * return static buffer
 543                 */
 544                ref_git = xstrdup(ref_git_s);
 545
 546        repo = read_gitfile(ref_git);
 547        if (!repo)
 548                repo = read_gitfile(mkpath("%s/.git", ref_git));
 549        if (repo) {
 550                free(ref_git);
 551                ref_git = xstrdup(repo);
 552        }
 553
 554        if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
 555                char *ref_git_git = mkpathdup("%s/.git", ref_git);
 556                free(ref_git);
 557                ref_git = ref_git_git;
 558        } else if (!is_directory(mkpath("%s/objects", ref_git))) {
 559                struct strbuf sb = STRBUF_INIT;
 560                seen_error = 1;
 561                if (get_common_dir(&sb, ref_git)) {
 562                        strbuf_addf(err,
 563                                    _("reference repository '%s' as a linked "
 564                                      "checkout is not supported yet."),
 565                                    path);
 566                        goto out;
 567                }
 568
 569                strbuf_addf(err, _("reference repository '%s' is not a "
 570                                        "local repository."), path);
 571                goto out;
 572        }
 573
 574        if (!access(mkpath("%s/shallow", ref_git), F_OK)) {
 575                strbuf_addf(err, _("reference repository '%s' is shallow"),
 576                            path);
 577                seen_error = 1;
 578                goto out;
 579        }
 580
 581        if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) {
 582                strbuf_addf(err,
 583                            _("reference repository '%s' is grafted"),
 584                            path);
 585                seen_error = 1;
 586                goto out;
 587        }
 588
 589out:
 590        if (seen_error) {
 591                FREE_AND_NULL(ref_git);
 592        }
 593
 594        return ref_git;
 595}
 596
 597int foreach_alt_odb(alt_odb_fn fn, void *cb)
 598{
 599        struct alternate_object_database *ent;
 600        int r = 0;
 601
 602        prepare_alt_odb();
 603        for (ent = alt_odb_list; ent; ent = ent->next) {
 604                r = fn(ent, cb);
 605                if (r)
 606                        break;
 607        }
 608        return r;
 609}
 610
 611void prepare_alt_odb(void)
 612{
 613        const char *alt;
 614
 615        if (alt_odb_tail)
 616                return;
 617
 618        alt = getenv(ALTERNATE_DB_ENVIRONMENT);
 619        if (!alt) alt = "";
 620
 621        alt_odb_tail = &alt_odb_list;
 622        link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
 623
 624        read_info_alternates(get_object_directory(), 0);
 625}
 626
 627/* Returns 1 if we have successfully freshened the file, 0 otherwise. */
 628static int freshen_file(const char *fn)
 629{
 630        struct utimbuf t;
 631        t.actime = t.modtime = time(NULL);
 632        return !utime(fn, &t);
 633}
 634
 635/*
 636 * All of the check_and_freshen functions return 1 if the file exists and was
 637 * freshened (if freshening was requested), 0 otherwise. If they return
 638 * 0, you should not assume that it is safe to skip a write of the object (it
 639 * either does not exist on disk, or has a stale mtime and may be subject to
 640 * pruning).
 641 */
 642int check_and_freshen_file(const char *fn, int freshen)
 643{
 644        if (access(fn, F_OK))
 645                return 0;
 646        if (freshen && !freshen_file(fn))
 647                return 0;
 648        return 1;
 649}
 650
 651static int check_and_freshen_local(const unsigned char *sha1, int freshen)
 652{
 653        return check_and_freshen_file(sha1_file_name(sha1), freshen);
 654}
 655
 656static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
 657{
 658        struct alternate_object_database *alt;
 659        prepare_alt_odb();
 660        for (alt = alt_odb_list; alt; alt = alt->next) {
 661                const char *path = alt_sha1_path(alt, sha1);
 662                if (check_and_freshen_file(path, freshen))
 663                        return 1;
 664        }
 665        return 0;
 666}
 667
 668static int check_and_freshen(const unsigned char *sha1, int freshen)
 669{
 670        return check_and_freshen_local(sha1, freshen) ||
 671               check_and_freshen_nonlocal(sha1, freshen);
 672}
 673
 674int has_loose_object_nonlocal(const unsigned char *sha1)
 675{
 676        return check_and_freshen_nonlocal(sha1, 0);
 677}
 678
 679static int has_loose_object(const unsigned char *sha1)
 680{
 681        return check_and_freshen(sha1, 0);
 682}
 683
 684static void mmap_limit_check(size_t length)
 685{
 686        static size_t limit = 0;
 687        if (!limit) {
 688                limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
 689                if (!limit)
 690                        limit = SIZE_MAX;
 691        }
 692        if (length > limit)
 693                die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
 694                    (uintmax_t)length, (uintmax_t)limit);
 695}
 696
 697void *xmmap_gently(void *start, size_t length,
 698                  int prot, int flags, int fd, off_t offset)
 699{
 700        void *ret;
 701
 702        mmap_limit_check(length);
 703        ret = mmap(start, length, prot, flags, fd, offset);
 704        if (ret == MAP_FAILED) {
 705                if (!length)
 706                        return NULL;
 707                release_pack_memory(length);
 708                ret = mmap(start, length, prot, flags, fd, offset);
 709        }
 710        return ret;
 711}
 712
 713void *xmmap(void *start, size_t length,
 714        int prot, int flags, int fd, off_t offset)
 715{
 716        void *ret = xmmap_gently(start, length, prot, flags, fd, offset);
 717        if (ret == MAP_FAILED)
 718                die_errno("mmap failed");
 719        return ret;
 720}
 721
 722void (*report_garbage)(unsigned seen_bits, const char *path);
 723
 724static void report_helper(const struct string_list *list,
 725                          int seen_bits, int first, int last)
 726{
 727        if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
 728                return;
 729
 730        for (; first < last; first++)
 731                report_garbage(seen_bits, list->items[first].string);
 732}
 733
 734static void report_pack_garbage(struct string_list *list)
 735{
 736        int i, baselen = -1, first = 0, seen_bits = 0;
 737
 738        if (!report_garbage)
 739                return;
 740
 741        string_list_sort(list);
 742
 743        for (i = 0; i < list->nr; i++) {
 744                const char *path = list->items[i].string;
 745                if (baselen != -1 &&
 746                    strncmp(path, list->items[first].string, baselen)) {
 747                        report_helper(list, seen_bits, first, i);
 748                        baselen = -1;
 749                        seen_bits = 0;
 750                }
 751                if (baselen == -1) {
 752                        const char *dot = strrchr(path, '.');
 753                        if (!dot) {
 754                                report_garbage(PACKDIR_FILE_GARBAGE, path);
 755                                continue;
 756                        }
 757                        baselen = dot - path + 1;
 758                        first = i;
 759                }
 760                if (!strcmp(path + baselen, "pack"))
 761                        seen_bits |= 1;
 762                else if (!strcmp(path + baselen, "idx"))
 763                        seen_bits |= 2;
 764        }
 765        report_helper(list, seen_bits, first, list->nr);
 766}
 767
 768static void prepare_packed_git_one(char *objdir, int local)
 769{
 770        struct strbuf path = STRBUF_INIT;
 771        size_t dirnamelen;
 772        DIR *dir;
 773        struct dirent *de;
 774        struct string_list garbage = STRING_LIST_INIT_DUP;
 775
 776        strbuf_addstr(&path, objdir);
 777        strbuf_addstr(&path, "/pack");
 778        dir = opendir(path.buf);
 779        if (!dir) {
 780                if (errno != ENOENT)
 781                        error_errno("unable to open object pack directory: %s",
 782                                    path.buf);
 783                strbuf_release(&path);
 784                return;
 785        }
 786        strbuf_addch(&path, '/');
 787        dirnamelen = path.len;
 788        while ((de = readdir(dir)) != NULL) {
 789                struct packed_git *p;
 790                size_t base_len;
 791
 792                if (is_dot_or_dotdot(de->d_name))
 793                        continue;
 794
 795                strbuf_setlen(&path, dirnamelen);
 796                strbuf_addstr(&path, de->d_name);
 797
 798                base_len = path.len;
 799                if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
 800                        /* Don't reopen a pack we already have. */
 801                        for (p = packed_git; p; p = p->next) {
 802                                size_t len;
 803                                if (strip_suffix(p->pack_name, ".pack", &len) &&
 804                                    len == base_len &&
 805                                    !memcmp(p->pack_name, path.buf, len))
 806                                        break;
 807                        }
 808                        if (p == NULL &&
 809                            /*
 810                             * See if it really is a valid .idx file with
 811                             * corresponding .pack file that we can map.
 812                             */
 813                            (p = add_packed_git(path.buf, path.len, local)) != NULL)
 814                                install_packed_git(p);
 815                }
 816
 817                if (!report_garbage)
 818                        continue;
 819
 820                if (ends_with(de->d_name, ".idx") ||
 821                    ends_with(de->d_name, ".pack") ||
 822                    ends_with(de->d_name, ".bitmap") ||
 823                    ends_with(de->d_name, ".keep"))
 824                        string_list_append(&garbage, path.buf);
 825                else
 826                        report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
 827        }
 828        closedir(dir);
 829        report_pack_garbage(&garbage);
 830        string_list_clear(&garbage, 0);
 831        strbuf_release(&path);
 832}
 833
 834static int approximate_object_count_valid;
 835
 836/*
 837 * Give a fast, rough count of the number of objects in the repository. This
 838 * ignores loose objects completely. If you have a lot of them, then either
 839 * you should repack because your performance will be awful, or they are
 840 * all unreachable objects about to be pruned, in which case they're not really
 841 * interesting as a measure of repo size in the first place.
 842 */
 843unsigned long approximate_object_count(void)
 844{
 845        static unsigned long count;
 846        if (!approximate_object_count_valid) {
 847                struct packed_git *p;
 848
 849                prepare_packed_git();
 850                count = 0;
 851                for (p = packed_git; p; p = p->next) {
 852                        if (open_pack_index(p))
 853                                continue;
 854                        count += p->num_objects;
 855                }
 856        }
 857        return count;
 858}
 859
 860static void *get_next_packed_git(const void *p)
 861{
 862        return ((const struct packed_git *)p)->next;
 863}
 864
 865static void set_next_packed_git(void *p, void *next)
 866{
 867        ((struct packed_git *)p)->next = next;
 868}
 869
 870static int sort_pack(const void *a_, const void *b_)
 871{
 872        const struct packed_git *a = a_;
 873        const struct packed_git *b = b_;
 874        int st;
 875
 876        /*
 877         * Local packs tend to contain objects specific to our
 878         * variant of the project than remote ones.  In addition,
 879         * remote ones could be on a network mounted filesystem.
 880         * Favor local ones for these reasons.
 881         */
 882        st = a->pack_local - b->pack_local;
 883        if (st)
 884                return -st;
 885
 886        /*
 887         * Younger packs tend to contain more recent objects,
 888         * and more recent objects tend to get accessed more
 889         * often.
 890         */
 891        if (a->mtime < b->mtime)
 892                return 1;
 893        else if (a->mtime == b->mtime)
 894                return 0;
 895        return -1;
 896}
 897
 898static void rearrange_packed_git(void)
 899{
 900        packed_git = llist_mergesort(packed_git, get_next_packed_git,
 901                                     set_next_packed_git, sort_pack);
 902}
 903
 904static void prepare_packed_git_mru(void)
 905{
 906        struct packed_git *p;
 907
 908        mru_clear(packed_git_mru);
 909        for (p = packed_git; p; p = p->next)
 910                mru_append(packed_git_mru, p);
 911}
 912
 913static int prepare_packed_git_run_once = 0;
 914void prepare_packed_git(void)
 915{
 916        struct alternate_object_database *alt;
 917
 918        if (prepare_packed_git_run_once)
 919                return;
 920        prepare_packed_git_one(get_object_directory(), 1);
 921        prepare_alt_odb();
 922        for (alt = alt_odb_list; alt; alt = alt->next)
 923                prepare_packed_git_one(alt->path, 0);
 924        rearrange_packed_git();
 925        prepare_packed_git_mru();
 926        prepare_packed_git_run_once = 1;
 927}
 928
 929void reprepare_packed_git(void)
 930{
 931        approximate_object_count_valid = 0;
 932        prepare_packed_git_run_once = 0;
 933        prepare_packed_git();
 934}
 935
 936static void mark_bad_packed_object(struct packed_git *p,
 937                                   const unsigned char *sha1)
 938{
 939        unsigned i;
 940        for (i = 0; i < p->num_bad_objects; i++)
 941                if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
 942                        return;
 943        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
 944                                      st_mult(GIT_MAX_RAWSZ,
 945                                              st_add(p->num_bad_objects, 1)));
 946        hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
 947        p->num_bad_objects++;
 948}
 949
 950static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
 951{
 952        struct packed_git *p;
 953        unsigned i;
 954
 955        for (p = packed_git; p; p = p->next)
 956                for (i = 0; i < p->num_bad_objects; i++)
 957                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
 958                                return p;
 959        return NULL;
 960}
 961
 962/*
 963 * With an in-core object data in "map", rehash it to make sure the
 964 * object name actually matches "sha1" to detect object corruption.
 965 * With "map" == NULL, try reading the object named with "sha1" using
 966 * the streaming interface and rehash it to do the same.
 967 */
 968int check_sha1_signature(const unsigned char *sha1, void *map,
 969                         unsigned long size, const char *type)
 970{
 971        unsigned char real_sha1[20];
 972        enum object_type obj_type;
 973        struct git_istream *st;
 974        git_SHA_CTX c;
 975        char hdr[32];
 976        int hdrlen;
 977
 978        if (map) {
 979                hash_sha1_file(map, size, type, real_sha1);
 980                return hashcmp(sha1, real_sha1) ? -1 : 0;
 981        }
 982
 983        st = open_istream(sha1, &obj_type, &size, NULL);
 984        if (!st)
 985                return -1;
 986
 987        /* Generate the header */
 988        hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1;
 989
 990        /* Sha1.. */
 991        git_SHA1_Init(&c);
 992        git_SHA1_Update(&c, hdr, hdrlen);
 993        for (;;) {
 994                char buf[1024 * 16];
 995                ssize_t readlen = read_istream(st, buf, sizeof(buf));
 996
 997                if (readlen < 0) {
 998                        close_istream(st);
 999                        return -1;
1000                }
1001                if (!readlen)
1002                        break;
1003                git_SHA1_Update(&c, buf, readlen);
1004        }
1005        git_SHA1_Final(real_sha1, &c);
1006        close_istream(st);
1007        return hashcmp(sha1, real_sha1) ? -1 : 0;
1008}
1009
1010int git_open_cloexec(const char *name, int flags)
1011{
1012        int fd;
1013        static int o_cloexec = O_CLOEXEC;
1014
1015        fd = open(name, flags | o_cloexec);
1016        if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) {
1017                /* Try again w/o O_CLOEXEC: the kernel might not support it */
1018                o_cloexec &= ~O_CLOEXEC;
1019                fd = open(name, flags | o_cloexec);
1020        }
1021
1022#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
1023        {
1024                static int fd_cloexec = FD_CLOEXEC;
1025
1026                if (!o_cloexec && 0 <= fd && fd_cloexec) {
1027                        /* Opened w/o O_CLOEXEC?  try with fcntl(2) to add it */
1028                        int flags = fcntl(fd, F_GETFD);
1029                        if (fcntl(fd, F_SETFD, flags | fd_cloexec))
1030                                fd_cloexec = 0;
1031                }
1032        }
1033#endif
1034        return fd;
1035}
1036
1037/*
1038 * Find "sha1" as a loose object in the local repository or in an alternate.
1039 * Returns 0 on success, negative on failure.
1040 *
1041 * The "path" out-parameter will give the path of the object we found (if any).
1042 * Note that it may point to static storage and is only valid until another
1043 * call to sha1_file_name(), etc.
1044 */
1045static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
1046                          const char **path)
1047{
1048        struct alternate_object_database *alt;
1049
1050        *path = sha1_file_name(sha1);
1051        if (!lstat(*path, st))
1052                return 0;
1053
1054        prepare_alt_odb();
1055        errno = ENOENT;
1056        for (alt = alt_odb_list; alt; alt = alt->next) {
1057                *path = alt_sha1_path(alt, sha1);
1058                if (!lstat(*path, st))
1059                        return 0;
1060        }
1061
1062        return -1;
1063}
1064
1065/*
1066 * Like stat_sha1_file(), but actually open the object and return the
1067 * descriptor. See the caveats on the "path" parameter above.
1068 */
1069static int open_sha1_file(const unsigned char *sha1, const char **path)
1070{
1071        int fd;
1072        struct alternate_object_database *alt;
1073        int most_interesting_errno;
1074
1075        *path = sha1_file_name(sha1);
1076        fd = git_open(*path);
1077        if (fd >= 0)
1078                return fd;
1079        most_interesting_errno = errno;
1080
1081        prepare_alt_odb();
1082        for (alt = alt_odb_list; alt; alt = alt->next) {
1083                *path = alt_sha1_path(alt, sha1);
1084                fd = git_open(*path);
1085                if (fd >= 0)
1086                        return fd;
1087                if (most_interesting_errno == ENOENT)
1088                        most_interesting_errno = errno;
1089        }
1090        errno = most_interesting_errno;
1091        return -1;
1092}
1093
1094/*
1095 * Map the loose object at "path" if it is not NULL, or the path found by
1096 * searching for a loose object named "sha1".
1097 */
1098static void *map_sha1_file_1(const char *path,
1099                             const unsigned char *sha1,
1100                             unsigned long *size)
1101{
1102        void *map;
1103        int fd;
1104
1105        if (path)
1106                fd = git_open(path);
1107        else
1108                fd = open_sha1_file(sha1, &path);
1109        map = NULL;
1110        if (fd >= 0) {
1111                struct stat st;
1112
1113                if (!fstat(fd, &st)) {
1114                        *size = xsize_t(st.st_size);
1115                        if (!*size) {
1116                                /* mmap() is forbidden on empty files */
1117                                error("object file %s is empty", path);
1118                                return NULL;
1119                        }
1120                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1121                }
1122                close(fd);
1123        }
1124        return map;
1125}
1126
1127void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1128{
1129        return map_sha1_file_1(NULL, sha1, size);
1130}
1131
1132unsigned long unpack_object_header_buffer(const unsigned char *buf,
1133                unsigned long len, enum object_type *type, unsigned long *sizep)
1134{
1135        unsigned shift;
1136        unsigned long size, c;
1137        unsigned long used = 0;
1138
1139        c = buf[used++];
1140        *type = (c >> 4) & 7;
1141        size = c & 15;
1142        shift = 4;
1143        while (c & 0x80) {
1144                if (len <= used || bitsizeof(long) <= shift) {
1145                        error("bad object header");
1146                        size = used = 0;
1147                        break;
1148                }
1149                c = buf[used++];
1150                size += (c & 0x7f) << shift;
1151                shift += 7;
1152        }
1153        *sizep = size;
1154        return used;
1155}
1156
1157static int unpack_sha1_short_header(git_zstream *stream,
1158                                    unsigned char *map, unsigned long mapsize,
1159                                    void *buffer, unsigned long bufsiz)
1160{
1161        /* Get the data stream */
1162        memset(stream, 0, sizeof(*stream));
1163        stream->next_in = map;
1164        stream->avail_in = mapsize;
1165        stream->next_out = buffer;
1166        stream->avail_out = bufsiz;
1167
1168        git_inflate_init(stream);
1169        return git_inflate(stream, 0);
1170}
1171
1172int unpack_sha1_header(git_zstream *stream,
1173                       unsigned char *map, unsigned long mapsize,
1174                       void *buffer, unsigned long bufsiz)
1175{
1176        int status = unpack_sha1_short_header(stream, map, mapsize,
1177                                              buffer, bufsiz);
1178
1179        if (status < Z_OK)
1180                return status;
1181
1182        /* Make sure we have the terminating NUL */
1183        if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1184                return -1;
1185        return 0;
1186}
1187
1188static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
1189                                        unsigned long mapsize, void *buffer,
1190                                        unsigned long bufsiz, struct strbuf *header)
1191{
1192        int status;
1193
1194        status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);
1195        if (status < Z_OK)
1196                return -1;
1197
1198        /*
1199         * Check if entire header is unpacked in the first iteration.
1200         */
1201        if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1202                return 0;
1203
1204        /*
1205         * buffer[0..bufsiz] was not large enough.  Copy the partial
1206         * result out to header, and then append the result of further
1207         * reading the stream.
1208         */
1209        strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1210        stream->next_out = buffer;
1211        stream->avail_out = bufsiz;
1212
1213        do {
1214                status = git_inflate(stream, 0);
1215                strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1216                if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1217                        return 0;
1218                stream->next_out = buffer;
1219                stream->avail_out = bufsiz;
1220        } while (status != Z_STREAM_END);
1221        return -1;
1222}
1223
1224static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1225{
1226        int bytes = strlen(buffer) + 1;
1227        unsigned char *buf = xmallocz(size);
1228        unsigned long n;
1229        int status = Z_OK;
1230
1231        n = stream->total_out - bytes;
1232        if (n > size)
1233                n = size;
1234        memcpy(buf, (char *) buffer + bytes, n);
1235        bytes = n;
1236        if (bytes <= size) {
1237                /*
1238                 * The above condition must be (bytes <= size), not
1239                 * (bytes < size).  In other words, even though we
1240                 * expect no more output and set avail_out to zero,
1241                 * the input zlib stream may have bytes that express
1242                 * "this concludes the stream", and we *do* want to
1243                 * eat that input.
1244                 *
1245                 * Otherwise we would not be able to test that we
1246                 * consumed all the input to reach the expected size;
1247                 * we also want to check that zlib tells us that all
1248                 * went well with status == Z_STREAM_END at the end.
1249                 */
1250                stream->next_out = buf + bytes;
1251                stream->avail_out = size - bytes;
1252                while (status == Z_OK)
1253                        status = git_inflate(stream, Z_FINISH);
1254        }
1255        if (status == Z_STREAM_END && !stream->avail_in) {
1256                git_inflate_end(stream);
1257                return buf;
1258        }
1259
1260        if (status < 0)
1261                error("corrupt loose object '%s'", sha1_to_hex(sha1));
1262        else if (stream->avail_in)
1263                error("garbage at end of loose object '%s'",
1264                      sha1_to_hex(sha1));
1265        free(buf);
1266        return NULL;
1267}
1268
1269/*
1270 * We used to just use "sscanf()", but that's actually way
1271 * too permissive for what we want to check. So do an anal
1272 * object header parse by hand.
1273 */
1274static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
1275                               unsigned int flags)
1276{
1277        const char *type_buf = hdr;
1278        unsigned long size;
1279        int type, type_len = 0;
1280
1281        /*
1282         * The type can be of any size but is followed by
1283         * a space.
1284         */
1285        for (;;) {
1286                char c = *hdr++;
1287                if (!c)
1288                        return -1;
1289                if (c == ' ')
1290                        break;
1291                type_len++;
1292        }
1293
1294        type = type_from_string_gently(type_buf, type_len, 1);
1295        if (oi->typename)
1296                strbuf_add(oi->typename, type_buf, type_len);
1297        /*
1298         * Set type to 0 if its an unknown object and
1299         * we're obtaining the type using '--allow-unknown-type'
1300         * option.
1301         */
1302        if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0))
1303                type = 0;
1304        else if (type < 0)
1305                die("invalid object type");
1306        if (oi->typep)
1307                *oi->typep = type;
1308
1309        /*
1310         * The length must follow immediately, and be in canonical
1311         * decimal format (ie "010" is not valid).
1312         */
1313        size = *hdr++ - '0';
1314        if (size > 9)
1315                return -1;
1316        if (size) {
1317                for (;;) {
1318                        unsigned long c = *hdr - '0';
1319                        if (c > 9)
1320                                break;
1321                        hdr++;
1322                        size = size * 10 + c;
1323                }
1324        }
1325
1326        if (oi->sizep)
1327                *oi->sizep = size;
1328
1329        /*
1330         * The length must be followed by a zero byte
1331         */
1332        return *hdr ? -1 : type;
1333}
1334
1335int parse_sha1_header(const char *hdr, unsigned long *sizep)
1336{
1337        struct object_info oi = OBJECT_INFO_INIT;
1338
1339        oi.sizep = sizep;
1340        return parse_sha1_header_extended(hdr, &oi, 0);
1341}
1342
1343unsigned long get_size_from_delta(struct packed_git *p,
1344                                  struct pack_window **w_curs,
1345                                  off_t curpos)
1346{
1347        const unsigned char *data;
1348        unsigned char delta_head[20], *in;
1349        git_zstream stream;
1350        int st;
1351
1352        memset(&stream, 0, sizeof(stream));
1353        stream.next_out = delta_head;
1354        stream.avail_out = sizeof(delta_head);
1355
1356        git_inflate_init(&stream);
1357        do {
1358                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1359                stream.next_in = in;
1360                st = git_inflate(&stream, Z_FINISH);
1361                curpos += stream.next_in - in;
1362        } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1363                 stream.total_out < sizeof(delta_head));
1364        git_inflate_end(&stream);
1365        if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1366                error("delta data unpack-initial failed");
1367                return 0;
1368        }
1369
1370        /* Examine the initial part of the delta to figure out
1371         * the result size.
1372         */
1373        data = delta_head;
1374
1375        /* ignore base size */
1376        get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1377
1378        /* Read the result size */
1379        return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1380}
1381
1382static off_t get_delta_base(struct packed_git *p,
1383                                    struct pack_window **w_curs,
1384                                    off_t *curpos,
1385                                    enum object_type type,
1386                                    off_t delta_obj_offset)
1387{
1388        unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1389        off_t base_offset;
1390
1391        /* use_pack() assured us we have [base_info, base_info + 20)
1392         * as a range that we can look at without walking off the
1393         * end of the mapped window.  Its actually the hash size
1394         * that is assured.  An OFS_DELTA longer than the hash size
1395         * is stupid, as then a REF_DELTA would be smaller to store.
1396         */
1397        if (type == OBJ_OFS_DELTA) {
1398                unsigned used = 0;
1399                unsigned char c = base_info[used++];
1400                base_offset = c & 127;
1401                while (c & 128) {
1402                        base_offset += 1;
1403                        if (!base_offset || MSB(base_offset, 7))
1404                                return 0;  /* overflow */
1405                        c = base_info[used++];
1406                        base_offset = (base_offset << 7) + (c & 127);
1407                }
1408                base_offset = delta_obj_offset - base_offset;
1409                if (base_offset <= 0 || base_offset >= delta_obj_offset)
1410                        return 0;  /* out of bound */
1411                *curpos += used;
1412        } else if (type == OBJ_REF_DELTA) {
1413                /* The base entry _must_ be in the same pack */
1414                base_offset = find_pack_entry_one(base_info, p);
1415                *curpos += 20;
1416        } else
1417                die("I am totally screwed");
1418        return base_offset;
1419}
1420
1421/*
1422 * Like get_delta_base above, but we return the sha1 instead of the pack
1423 * offset. This means it is cheaper for REF deltas (we do not have to do
1424 * the final object lookup), but more expensive for OFS deltas (we
1425 * have to load the revidx to convert the offset back into a sha1).
1426 */
1427static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1428                                                struct pack_window **w_curs,
1429                                                off_t curpos,
1430                                                enum object_type type,
1431                                                off_t delta_obj_offset)
1432{
1433        if (type == OBJ_REF_DELTA) {
1434                unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1435                return base;
1436        } else if (type == OBJ_OFS_DELTA) {
1437                struct revindex_entry *revidx;
1438                off_t base_offset = get_delta_base(p, w_curs, &curpos,
1439                                                   type, delta_obj_offset);
1440
1441                if (!base_offset)
1442                        return NULL;
1443
1444                revidx = find_pack_revindex(p, base_offset);
1445                if (!revidx)
1446                        return NULL;
1447
1448                return nth_packed_object_sha1(p, revidx->nr);
1449        } else
1450                return NULL;
1451}
1452
1453int unpack_object_header(struct packed_git *p,
1454                         struct pack_window **w_curs,
1455                         off_t *curpos,
1456                         unsigned long *sizep)
1457{
1458        unsigned char *base;
1459        unsigned long left;
1460        unsigned long used;
1461        enum object_type type;
1462
1463        /* use_pack() assures us we have [base, base + 20) available
1464         * as a range that we can look at.  (Its actually the hash
1465         * size that is assured.)  With our object header encoding
1466         * the maximum deflated object size is 2^137, which is just
1467         * insane, so we know won't exceed what we have been given.
1468         */
1469        base = use_pack(p, w_curs, *curpos, &left);
1470        used = unpack_object_header_buffer(base, left, &type, sizep);
1471        if (!used) {
1472                type = OBJ_BAD;
1473        } else
1474                *curpos += used;
1475
1476        return type;
1477}
1478
1479static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1480{
1481        int type;
1482        struct revindex_entry *revidx;
1483        const unsigned char *sha1;
1484        revidx = find_pack_revindex(p, obj_offset);
1485        if (!revidx)
1486                return OBJ_BAD;
1487        sha1 = nth_packed_object_sha1(p, revidx->nr);
1488        mark_bad_packed_object(p, sha1);
1489        type = sha1_object_info(sha1, NULL);
1490        if (type <= OBJ_NONE)
1491                return OBJ_BAD;
1492        return type;
1493}
1494
1495#define POI_STACK_PREALLOC 64
1496
1497static enum object_type packed_to_object_type(struct packed_git *p,
1498                                              off_t obj_offset,
1499                                              enum object_type type,
1500                                              struct pack_window **w_curs,
1501                                              off_t curpos)
1502{
1503        off_t small_poi_stack[POI_STACK_PREALLOC];
1504        off_t *poi_stack = small_poi_stack;
1505        int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1506
1507        while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1508                off_t base_offset;
1509                unsigned long size;
1510                /* Push the object we're going to leave behind */
1511                if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1512                        poi_stack_alloc = alloc_nr(poi_stack_nr);
1513                        ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1514                        memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1515                } else {
1516                        ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1517                }
1518                poi_stack[poi_stack_nr++] = obj_offset;
1519                /* If parsing the base offset fails, just unwind */
1520                base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1521                if (!base_offset)
1522                        goto unwind;
1523                curpos = obj_offset = base_offset;
1524                type = unpack_object_header(p, w_curs, &curpos, &size);
1525                if (type <= OBJ_NONE) {
1526                        /* If getting the base itself fails, we first
1527                         * retry the base, otherwise unwind */
1528                        type = retry_bad_packed_offset(p, base_offset);
1529                        if (type > OBJ_NONE)
1530                                goto out;
1531                        goto unwind;
1532                }
1533        }
1534
1535        switch (type) {
1536        case OBJ_BAD:
1537        case OBJ_COMMIT:
1538        case OBJ_TREE:
1539        case OBJ_BLOB:
1540        case OBJ_TAG:
1541                break;
1542        default:
1543                error("unknown object type %i at offset %"PRIuMAX" in %s",
1544                      type, (uintmax_t)obj_offset, p->pack_name);
1545                type = OBJ_BAD;
1546        }
1547
1548out:
1549        if (poi_stack != small_poi_stack)
1550                free(poi_stack);
1551        return type;
1552
1553unwind:
1554        while (poi_stack_nr) {
1555                obj_offset = poi_stack[--poi_stack_nr];
1556                type = retry_bad_packed_offset(p, obj_offset);
1557                if (type > OBJ_NONE)
1558                        goto out;
1559        }
1560        type = OBJ_BAD;
1561        goto out;
1562}
1563
1564static struct hashmap delta_base_cache;
1565static size_t delta_base_cached;
1566
1567static LIST_HEAD(delta_base_cache_lru);
1568
1569struct delta_base_cache_key {
1570        struct packed_git *p;
1571        off_t base_offset;
1572};
1573
1574struct delta_base_cache_entry {
1575        struct hashmap hash;
1576        struct delta_base_cache_key key;
1577        struct list_head lru;
1578        void *data;
1579        unsigned long size;
1580        enum object_type type;
1581};
1582
1583static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1584{
1585        unsigned int hash;
1586
1587        hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1588        hash += (hash >> 8) + (hash >> 16);
1589        return hash;
1590}
1591
1592static struct delta_base_cache_entry *
1593get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1594{
1595        struct hashmap_entry entry;
1596        struct delta_base_cache_key key;
1597
1598        if (!delta_base_cache.cmpfn)
1599                return NULL;
1600
1601        hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1602        key.p = p;
1603        key.base_offset = base_offset;
1604        return hashmap_get(&delta_base_cache, &entry, &key);
1605}
1606
1607static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1608                                   const struct delta_base_cache_key *b)
1609{
1610        return a->p == b->p && a->base_offset == b->base_offset;
1611}
1612
1613static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1614                                     const void *va, const void *vb,
1615                                     const void *vkey)
1616{
1617        const struct delta_base_cache_entry *a = va, *b = vb;
1618        const struct delta_base_cache_key *key = vkey;
1619        if (key)
1620                return !delta_base_cache_key_eq(&a->key, key);
1621        else
1622                return !delta_base_cache_key_eq(&a->key, &b->key);
1623}
1624
1625static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1626{
1627        return !!get_delta_base_cache_entry(p, base_offset);
1628}
1629
1630/*
1631 * Remove the entry from the cache, but do _not_ free the associated
1632 * entry data. The caller takes ownership of the "data" buffer, and
1633 * should copy out any fields it wants before detaching.
1634 */
1635static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1636{
1637        hashmap_remove(&delta_base_cache, ent, &ent->key);
1638        list_del(&ent->lru);
1639        delta_base_cached -= ent->size;
1640        free(ent);
1641}
1642
1643static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1644        unsigned long *base_size, enum object_type *type)
1645{
1646        struct delta_base_cache_entry *ent;
1647
1648        ent = get_delta_base_cache_entry(p, base_offset);
1649        if (!ent)
1650                return unpack_entry(p, base_offset, type, base_size);
1651
1652        if (type)
1653                *type = ent->type;
1654        if (base_size)
1655                *base_size = ent->size;
1656        return xmemdupz(ent->data, ent->size);
1657}
1658
1659static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1660{
1661        free(ent->data);
1662        detach_delta_base_cache_entry(ent);
1663}
1664
1665void clear_delta_base_cache(void)
1666{
1667        struct list_head *lru, *tmp;
1668        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1669                struct delta_base_cache_entry *entry =
1670                        list_entry(lru, struct delta_base_cache_entry, lru);
1671                release_delta_base_cache(entry);
1672        }
1673}
1674
1675static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1676        void *base, unsigned long base_size, enum object_type type)
1677{
1678        struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1679        struct list_head *lru, *tmp;
1680
1681        delta_base_cached += base_size;
1682
1683        list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1684                struct delta_base_cache_entry *f =
1685                        list_entry(lru, struct delta_base_cache_entry, lru);
1686                if (delta_base_cached <= delta_base_cache_limit)
1687                        break;
1688                release_delta_base_cache(f);
1689        }
1690
1691        ent->key.p = p;
1692        ent->key.base_offset = base_offset;
1693        ent->type = type;
1694        ent->data = base;
1695        ent->size = base_size;
1696        list_add_tail(&ent->lru, &delta_base_cache_lru);
1697
1698        if (!delta_base_cache.cmpfn)
1699                hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1700        hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1701        hashmap_add(&delta_base_cache, ent);
1702}
1703
1704int packed_object_info(struct packed_git *p, off_t obj_offset,
1705                       struct object_info *oi)
1706{
1707        struct pack_window *w_curs = NULL;
1708        unsigned long size;
1709        off_t curpos = obj_offset;
1710        enum object_type type;
1711
1712        /*
1713         * We always get the representation type, but only convert it to
1714         * a "real" type later if the caller is interested.
1715         */
1716        if (oi->contentp) {
1717                *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1718                                                      &type);
1719                if (!*oi->contentp)
1720                        type = OBJ_BAD;
1721        } else {
1722                type = unpack_object_header(p, &w_curs, &curpos, &size);
1723        }
1724
1725        if (!oi->contentp && oi->sizep) {
1726                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1727                        off_t tmp_pos = curpos;
1728                        off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1729                                                           type, obj_offset);
1730                        if (!base_offset) {
1731                                type = OBJ_BAD;
1732                                goto out;
1733                        }
1734                        *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1735                        if (*oi->sizep == 0) {
1736                                type = OBJ_BAD;
1737                                goto out;
1738                        }
1739                } else {
1740                        *oi->sizep = size;
1741                }
1742        }
1743
1744        if (oi->disk_sizep) {
1745                struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1746                *oi->disk_sizep = revidx[1].offset - obj_offset;
1747        }
1748
1749        if (oi->typep || oi->typename) {
1750                enum object_type ptot;
1751                ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1752                                             curpos);
1753                if (oi->typep)
1754                        *oi->typep = ptot;
1755                if (oi->typename) {
1756                        const char *tn = typename(ptot);
1757                        if (tn)
1758                                strbuf_addstr(oi->typename, tn);
1759                }
1760                if (ptot < 0) {
1761                        type = OBJ_BAD;
1762                        goto out;
1763                }
1764        }
1765
1766        if (oi->delta_base_sha1) {
1767                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1768                        const unsigned char *base;
1769
1770                        base = get_delta_base_sha1(p, &w_curs, curpos,
1771                                                   type, obj_offset);
1772                        if (!base) {
1773                                type = OBJ_BAD;
1774                                goto out;
1775                        }
1776
1777                        hashcpy(oi->delta_base_sha1, base);
1778                } else
1779                        hashclr(oi->delta_base_sha1);
1780        }
1781
1782        oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1783                                                          OI_PACKED;
1784
1785out:
1786        unuse_pack(&w_curs);
1787        return type;
1788}
1789
1790static void *unpack_compressed_entry(struct packed_git *p,
1791                                    struct pack_window **w_curs,
1792                                    off_t curpos,
1793                                    unsigned long size)
1794{
1795        int st;
1796        git_zstream stream;
1797        unsigned char *buffer, *in;
1798
1799        buffer = xmallocz_gently(size);
1800        if (!buffer)
1801                return NULL;
1802        memset(&stream, 0, sizeof(stream));
1803        stream.next_out = buffer;
1804        stream.avail_out = size + 1;
1805
1806        git_inflate_init(&stream);
1807        do {
1808                in = use_pack(p, w_curs, curpos, &stream.avail_in);
1809                stream.next_in = in;
1810                st = git_inflate(&stream, Z_FINISH);
1811                if (!stream.avail_out)
1812                        break; /* the payload is larger than it should be */
1813                curpos += stream.next_in - in;
1814        } while (st == Z_OK || st == Z_BUF_ERROR);
1815        git_inflate_end(&stream);
1816        if ((st != Z_STREAM_END) || stream.total_out != size) {
1817                free(buffer);
1818                return NULL;
1819        }
1820
1821        return buffer;
1822}
1823
1824static void *read_object(const unsigned char *sha1, enum object_type *type,
1825                         unsigned long *size);
1826
1827static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1828{
1829        static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1830        trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1831                         p->pack_name, (uintmax_t)obj_offset);
1832}
1833
1834int do_check_packed_object_crc;
1835
1836#define UNPACK_ENTRY_STACK_PREALLOC 64
1837struct unpack_entry_stack_ent {
1838        off_t obj_offset;
1839        off_t curpos;
1840        unsigned long size;
1841};
1842
1843void *unpack_entry(struct packed_git *p, off_t obj_offset,
1844                   enum object_type *final_type, unsigned long *final_size)
1845{
1846        struct pack_window *w_curs = NULL;
1847        off_t curpos = obj_offset;
1848        void *data = NULL;
1849        unsigned long size;
1850        enum object_type type;
1851        struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1852        struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1853        int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1854        int base_from_cache = 0;
1855
1856        write_pack_access_log(p, obj_offset);
1857
1858        /* PHASE 1: drill down to the innermost base object */
1859        for (;;) {
1860                off_t base_offset;
1861                int i;
1862                struct delta_base_cache_entry *ent;
1863
1864                ent = get_delta_base_cache_entry(p, curpos);
1865                if (ent) {
1866                        type = ent->type;
1867                        data = ent->data;
1868                        size = ent->size;
1869                        detach_delta_base_cache_entry(ent);
1870                        base_from_cache = 1;
1871                        break;
1872                }
1873
1874                if (do_check_packed_object_crc && p->index_version > 1) {
1875                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1876                        off_t len = revidx[1].offset - obj_offset;
1877                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1878                                const unsigned char *sha1 =
1879                                        nth_packed_object_sha1(p, revidx->nr);
1880                                error("bad packed object CRC for %s",
1881                                      sha1_to_hex(sha1));
1882                                mark_bad_packed_object(p, sha1);
1883                                data = NULL;
1884                                goto out;
1885                        }
1886                }
1887
1888                type = unpack_object_header(p, &w_curs, &curpos, &size);
1889                if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1890                        break;
1891
1892                base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1893                if (!base_offset) {
1894                        error("failed to validate delta base reference "
1895                              "at offset %"PRIuMAX" from %s",
1896                              (uintmax_t)curpos, p->pack_name);
1897                        /* bail to phase 2, in hopes of recovery */
1898                        data = NULL;
1899                        break;
1900                }
1901
1902                /* push object, proceed to base */
1903                if (delta_stack_nr >= delta_stack_alloc
1904                    && delta_stack == small_delta_stack) {
1905                        delta_stack_alloc = alloc_nr(delta_stack_nr);
1906                        ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1907                        memcpy(delta_stack, small_delta_stack,
1908                               sizeof(*delta_stack)*delta_stack_nr);
1909                } else {
1910                        ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1911                }
1912                i = delta_stack_nr++;
1913                delta_stack[i].obj_offset = obj_offset;
1914                delta_stack[i].curpos = curpos;
1915                delta_stack[i].size = size;
1916
1917                curpos = obj_offset = base_offset;
1918        }
1919
1920        /* PHASE 2: handle the base */
1921        switch (type) {
1922        case OBJ_OFS_DELTA:
1923        case OBJ_REF_DELTA:
1924                if (data)
1925                        die("BUG: unpack_entry: left loop at a valid delta");
1926                break;
1927        case OBJ_COMMIT:
1928        case OBJ_TREE:
1929        case OBJ_BLOB:
1930        case OBJ_TAG:
1931                if (!base_from_cache)
1932                        data = unpack_compressed_entry(p, &w_curs, curpos, size);
1933                break;
1934        default:
1935                data = NULL;
1936                error("unknown object type %i at offset %"PRIuMAX" in %s",
1937                      type, (uintmax_t)obj_offset, p->pack_name);
1938        }
1939
1940        /* PHASE 3: apply deltas in order */
1941
1942        /* invariants:
1943         *   'data' holds the base data, or NULL if there was corruption
1944         */
1945        while (delta_stack_nr) {
1946                void *delta_data;
1947                void *base = data;
1948                void *external_base = NULL;
1949                unsigned long delta_size, base_size = size;
1950                int i;
1951
1952                data = NULL;
1953
1954                if (base)
1955                        add_delta_base_cache(p, obj_offset, base, base_size, type);
1956
1957                if (!base) {
1958                        /*
1959                         * We're probably in deep shit, but let's try to fetch
1960                         * the required base anyway from another pack or loose.
1961                         * This is costly but should happen only in the presence
1962                         * of a corrupted pack, and is better than failing outright.
1963                         */
1964                        struct revindex_entry *revidx;
1965                        const unsigned char *base_sha1;
1966                        revidx = find_pack_revindex(p, obj_offset);
1967                        if (revidx) {
1968                                base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1969                                error("failed to read delta base object %s"
1970                                      " at offset %"PRIuMAX" from %s",
1971                                      sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1972                                      p->pack_name);
1973                                mark_bad_packed_object(p, base_sha1);
1974                                base = read_object(base_sha1, &type, &base_size);
1975                                external_base = base;
1976                        }
1977                }
1978
1979                i = --delta_stack_nr;
1980                obj_offset = delta_stack[i].obj_offset;
1981                curpos = delta_stack[i].curpos;
1982                delta_size = delta_stack[i].size;
1983
1984                if (!base)
1985                        continue;
1986
1987                delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1988
1989                if (!delta_data) {
1990                        error("failed to unpack compressed delta "
1991                              "at offset %"PRIuMAX" from %s",
1992                              (uintmax_t)curpos, p->pack_name);
1993                        data = NULL;
1994                        free(external_base);
1995                        continue;
1996                }
1997
1998                data = patch_delta(base, base_size,
1999                                   delta_data, delta_size,
2000                                   &size);
2001
2002                /*
2003                 * We could not apply the delta; warn the user, but keep going.
2004                 * Our failure will be noticed either in the next iteration of
2005                 * the loop, or if this is the final delta, in the caller when
2006                 * we return NULL. Those code paths will take care of making
2007                 * a more explicit warning and retrying with another copy of
2008                 * the object.
2009                 */
2010                if (!data)
2011                        error("failed to apply delta");
2012
2013                free(delta_data);
2014                free(external_base);
2015        }
2016
2017        if (final_type)
2018                *final_type = type;
2019        if (final_size)
2020                *final_size = size;
2021
2022out:
2023        unuse_pack(&w_curs);
2024
2025        if (delta_stack != small_delta_stack)
2026                free(delta_stack);
2027
2028        return data;
2029}
2030
2031const unsigned char *nth_packed_object_sha1(struct packed_git *p,
2032                                            uint32_t n)
2033{
2034        const unsigned char *index = p->index_data;
2035        if (!index) {
2036                if (open_pack_index(p))
2037                        return NULL;
2038                index = p->index_data;
2039        }
2040        if (n >= p->num_objects)
2041                return NULL;
2042        index += 4 * 256;
2043        if (p->index_version == 1) {
2044                return index + 24 * n + 4;
2045        } else {
2046                index += 8;
2047                return index + 20 * n;
2048        }
2049}
2050
2051const struct object_id *nth_packed_object_oid(struct object_id *oid,
2052                                              struct packed_git *p,
2053                                              uint32_t n)
2054{
2055        const unsigned char *hash = nth_packed_object_sha1(p, n);
2056        if (!hash)
2057                return NULL;
2058        hashcpy(oid->hash, hash);
2059        return oid;
2060}
2061
2062void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
2063{
2064        const unsigned char *ptr = vptr;
2065        const unsigned char *start = p->index_data;
2066        const unsigned char *end = start + p->index_size;
2067        if (ptr < start)
2068                die(_("offset before start of pack index for %s (corrupt index?)"),
2069                    p->pack_name);
2070        /* No need to check for underflow; .idx files must be at least 8 bytes */
2071        if (ptr >= end - 8)
2072                die(_("offset beyond end of pack index for %s (truncated index?)"),
2073                    p->pack_name);
2074}
2075
2076off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
2077{
2078        const unsigned char *index = p->index_data;
2079        index += 4 * 256;
2080        if (p->index_version == 1) {
2081                return ntohl(*((uint32_t *)(index + 24 * n)));
2082        } else {
2083                uint32_t off;
2084                index += 8 + p->num_objects * (20 + 4);
2085                off = ntohl(*((uint32_t *)(index + 4 * n)));
2086                if (!(off & 0x80000000))
2087                        return off;
2088                index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
2089                check_pack_index_ptr(p, index);
2090                return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
2091                                   ntohl(*((uint32_t *)(index + 4)));
2092        }
2093}
2094
2095off_t find_pack_entry_one(const unsigned char *sha1,
2096                                  struct packed_git *p)
2097{
2098        const uint32_t *level1_ofs = p->index_data;
2099        const unsigned char *index = p->index_data;
2100        unsigned hi, lo, stride;
2101        static int debug_lookup = -1;
2102
2103        if (debug_lookup < 0)
2104                debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
2105
2106        if (!index) {
2107                if (open_pack_index(p))
2108                        return 0;
2109                level1_ofs = p->index_data;
2110                index = p->index_data;
2111        }
2112        if (p->index_version > 1) {
2113                level1_ofs += 2;
2114                index += 8;
2115        }
2116        index += 4 * 256;
2117        hi = ntohl(level1_ofs[*sha1]);
2118        lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
2119        if (p->index_version > 1) {
2120                stride = 20;
2121        } else {
2122                stride = 24;
2123                index += 4;
2124        }
2125
2126        if (debug_lookup)
2127                printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
2128                       sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
2129
2130        while (lo < hi) {
2131                unsigned mi = (lo + hi) / 2;
2132                int cmp = hashcmp(index + mi * stride, sha1);
2133
2134                if (debug_lookup)
2135                        printf("lo %u hi %u rg %u mi %u\n",
2136                               lo, hi, hi - lo, mi);
2137                if (!cmp)
2138                        return nth_packed_object_offset(p, mi);
2139                if (cmp > 0)
2140                        hi = mi;
2141                else
2142                        lo = mi+1;
2143        }
2144        return 0;
2145}
2146
2147int is_pack_valid(struct packed_git *p)
2148{
2149        /* An already open pack is known to be valid. */
2150        if (p->pack_fd != -1)
2151                return 1;
2152
2153        /* If the pack has one window completely covering the
2154         * file size, the pack is known to be valid even if
2155         * the descriptor is not currently open.
2156         */
2157        if (p->windows) {
2158                struct pack_window *w = p->windows;
2159
2160                if (!w->offset && w->len == p->pack_size)
2161                        return 1;
2162        }
2163
2164        /* Force the pack to open to prove its valid. */
2165        return !open_packed_git(p);
2166}
2167
2168static int fill_pack_entry(const unsigned char *sha1,
2169                           struct pack_entry *e,
2170                           struct packed_git *p)
2171{
2172        off_t offset;
2173
2174        if (p->num_bad_objects) {
2175                unsigned i;
2176                for (i = 0; i < p->num_bad_objects; i++)
2177                        if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
2178                                return 0;
2179        }
2180
2181        offset = find_pack_entry_one(sha1, p);
2182        if (!offset)
2183                return 0;
2184
2185        /*
2186         * We are about to tell the caller where they can locate the
2187         * requested object.  We better make sure the packfile is
2188         * still here and can be accessed before supplying that
2189         * answer, as it may have been deleted since the index was
2190         * loaded!
2191         */
2192        if (!is_pack_valid(p))
2193                return 0;
2194        e->offset = offset;
2195        e->p = p;
2196        hashcpy(e->sha1, sha1);
2197        return 1;
2198}
2199
2200/*
2201 * Iff a pack file contains the object named by sha1, return true and
2202 * store its location to e.
2203 */
2204static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
2205{
2206        struct mru_entry *p;
2207
2208        prepare_packed_git();
2209        if (!packed_git)
2210                return 0;
2211
2212        for (p = packed_git_mru->head; p; p = p->next) {
2213                if (fill_pack_entry(sha1, e, p->item)) {
2214                        mru_mark(packed_git_mru, p);
2215                        return 1;
2216                }
2217        }
2218        return 0;
2219}
2220
2221struct packed_git *find_sha1_pack(const unsigned char *sha1,
2222                                  struct packed_git *packs)
2223{
2224        struct packed_git *p;
2225
2226        for (p = packs; p; p = p->next) {
2227                if (find_pack_entry_one(sha1, p))
2228                        return p;
2229        }
2230        return NULL;
2231
2232}
2233
2234static int sha1_loose_object_info(const unsigned char *sha1,
2235                                  struct object_info *oi,
2236                                  int flags)
2237{
2238        int status = 0;
2239        unsigned long mapsize;
2240        void *map;
2241        git_zstream stream;
2242        char hdr[32];
2243        struct strbuf hdrbuf = STRBUF_INIT;
2244        unsigned long size_scratch;
2245
2246        if (oi->delta_base_sha1)
2247                hashclr(oi->delta_base_sha1);
2248
2249        /*
2250         * If we don't care about type or size, then we don't
2251         * need to look inside the object at all. Note that we
2252         * do not optimize out the stat call, even if the
2253         * caller doesn't care about the disk-size, since our
2254         * return value implicitly indicates whether the
2255         * object even exists.
2256         */
2257        if (!oi->typep && !oi->typename && !oi->sizep && !oi->contentp) {
2258                const char *path;
2259                struct stat st;
2260                if (stat_sha1_file(sha1, &st, &path) < 0)
2261                        return -1;
2262                if (oi->disk_sizep)
2263                        *oi->disk_sizep = st.st_size;
2264                return 0;
2265        }
2266
2267        map = map_sha1_file(sha1, &mapsize);
2268        if (!map)
2269                return -1;
2270
2271        if (!oi->sizep)
2272                oi->sizep = &size_scratch;
2273
2274        if (oi->disk_sizep)
2275                *oi->disk_sizep = mapsize;
2276        if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) {
2277                if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
2278                        status = error("unable to unpack %s header with --allow-unknown-type",
2279                                       sha1_to_hex(sha1));
2280        } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2281                status = error("unable to unpack %s header",
2282                               sha1_to_hex(sha1));
2283        if (status < 0)
2284                ; /* Do nothing */
2285        else if (hdrbuf.len) {
2286                if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
2287                        status = error("unable to parse %s header with --allow-unknown-type",
2288                                       sha1_to_hex(sha1));
2289        } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
2290                status = error("unable to parse %s header", sha1_to_hex(sha1));
2291
2292        if (status >= 0 && oi->contentp)
2293                *oi->contentp = unpack_sha1_rest(&stream, hdr,
2294                                                 *oi->sizep, sha1);
2295        else
2296                git_inflate_end(&stream);
2297
2298        munmap(map, mapsize);
2299        if (status && oi->typep)
2300                *oi->typep = status;
2301        if (oi->sizep == &size_scratch)
2302                oi->sizep = NULL;
2303        strbuf_release(&hdrbuf);
2304        oi->whence = OI_LOOSE;
2305        return (status < 0) ? status : 0;
2306}
2307
2308int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
2309{
2310        static struct object_info blank_oi = OBJECT_INFO_INIT;
2311        struct pack_entry e;
2312        int rtype;
2313        const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ?
2314                                    lookup_replace_object(sha1) :
2315                                    sha1;
2316
2317        if (!oi)
2318                oi = &blank_oi;
2319
2320        if (!(flags & OBJECT_INFO_SKIP_CACHED)) {
2321                struct cached_object *co = find_cached_object(real);
2322                if (co) {
2323                        if (oi->typep)
2324                                *(oi->typep) = co->type;
2325                        if (oi->sizep)
2326                                *(oi->sizep) = co->size;
2327                        if (oi->disk_sizep)
2328                                *(oi->disk_sizep) = 0;
2329                        if (oi->delta_base_sha1)
2330                                hashclr(oi->delta_base_sha1);
2331                        if (oi->typename)
2332                                strbuf_addstr(oi->typename, typename(co->type));
2333                        if (oi->contentp)
2334                                *oi->contentp = xmemdupz(co->buf, co->size);
2335                        oi->whence = OI_CACHED;
2336                        return 0;
2337                }
2338        }
2339
2340        if (!find_pack_entry(real, &e)) {
2341                /* Most likely it's a loose object. */
2342                if (!sha1_loose_object_info(real, oi, flags))
2343                        return 0;
2344
2345                /* Not a loose object; someone else may have just packed it. */
2346                if (flags & OBJECT_INFO_QUICK) {
2347                        return -1;
2348                } else {
2349                        reprepare_packed_git();
2350                        if (!find_pack_entry(real, &e))
2351                                return -1;
2352                }
2353        }
2354
2355        if (oi == &blank_oi)
2356                /*
2357                 * We know that the caller doesn't actually need the
2358                 * information below, so return early.
2359                 */
2360                return 0;
2361
2362        rtype = packed_object_info(e.p, e.offset, oi);
2363        if (rtype < 0) {
2364                mark_bad_packed_object(e.p, real);
2365                return sha1_object_info_extended(real, oi, 0);
2366        } else if (oi->whence == OI_PACKED) {
2367                oi->u.packed.offset = e.offset;
2368                oi->u.packed.pack = e.p;
2369                oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
2370                                         rtype == OBJ_OFS_DELTA);
2371        }
2372
2373        return 0;
2374}
2375
2376/* returns enum object_type or negative */
2377int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
2378{
2379        enum object_type type;
2380        struct object_info oi = OBJECT_INFO_INIT;
2381
2382        oi.typep = &type;
2383        oi.sizep = sizep;
2384        if (sha1_object_info_extended(sha1, &oi,
2385                                      OBJECT_INFO_LOOKUP_REPLACE) < 0)
2386                return -1;
2387        return type;
2388}
2389
2390int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
2391                      unsigned char *sha1)
2392{
2393        struct cached_object *co;
2394
2395        hash_sha1_file(buf, len, typename(type), sha1);
2396        if (has_sha1_file(sha1) || find_cached_object(sha1))
2397                return 0;
2398        ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc);
2399        co = &cached_objects[cached_object_nr++];
2400        co->size = len;
2401        co->type = type;
2402        co->buf = xmalloc(len);
2403        memcpy(co->buf, buf, len);
2404        hashcpy(co->sha1, sha1);
2405        return 0;
2406}
2407
2408static void *read_object(const unsigned char *sha1, enum object_type *type,
2409                         unsigned long *size)
2410{
2411        struct object_info oi = OBJECT_INFO_INIT;
2412        void *content;
2413        oi.typep = type;
2414        oi.sizep = size;
2415        oi.contentp = &content;
2416
2417        if (sha1_object_info_extended(sha1, &oi, 0) < 0)
2418                return NULL;
2419        return content;
2420}
2421
2422/*
2423 * This function dies on corrupt objects; the callers who want to
2424 * deal with them should arrange to call read_object() and give error
2425 * messages themselves.
2426 */
2427void *read_sha1_file_extended(const unsigned char *sha1,
2428                              enum object_type *type,
2429                              unsigned long *size,
2430                              int lookup_replace)
2431{
2432        void *data;
2433        const struct packed_git *p;
2434        const char *path;
2435        struct stat st;
2436        const unsigned char *repl = lookup_replace ? lookup_replace_object(sha1)
2437                                                   : sha1;
2438
2439        errno = 0;
2440        data = read_object(repl, type, size);
2441        if (data)
2442                return data;
2443
2444        if (errno && errno != ENOENT)
2445                die_errno("failed to read object %s", sha1_to_hex(sha1));
2446
2447        /* die if we replaced an object with one that does not exist */
2448        if (repl != sha1)
2449                die("replacement %s not found for %s",
2450                    sha1_to_hex(repl), sha1_to_hex(sha1));
2451
2452        if (!stat_sha1_file(repl, &st, &path))
2453                die("loose object %s (stored in %s) is corrupt",
2454                    sha1_to_hex(repl), path);
2455
2456        if ((p = has_packed_and_bad(repl)) != NULL)
2457                die("packed object %s (stored in %s) is corrupt",
2458                    sha1_to_hex(repl), p->pack_name);
2459
2460        return NULL;
2461}
2462
2463void *read_object_with_reference(const unsigned char *sha1,
2464                                 const char *required_type_name,
2465                                 unsigned long *size,
2466                                 unsigned char *actual_sha1_return)
2467{
2468        enum object_type type, required_type;
2469        void *buffer;
2470        unsigned long isize;
2471        unsigned char actual_sha1[20];
2472
2473        required_type = type_from_string(required_type_name);
2474        hashcpy(actual_sha1, sha1);
2475        while (1) {
2476                int ref_length = -1;
2477                const char *ref_type = NULL;
2478
2479                buffer = read_sha1_file(actual_sha1, &type, &isize);
2480                if (!buffer)
2481                        return NULL;
2482                if (type == required_type) {
2483                        *size = isize;
2484                        if (actual_sha1_return)
2485                                hashcpy(actual_sha1_return, actual_sha1);
2486                        return buffer;
2487                }
2488                /* Handle references */
2489                else if (type == OBJ_COMMIT)
2490                        ref_type = "tree ";
2491                else if (type == OBJ_TAG)
2492                        ref_type = "object ";
2493                else {
2494                        free(buffer);
2495                        return NULL;
2496                }
2497                ref_length = strlen(ref_type);
2498
2499                if (ref_length + 40 > isize ||
2500                    memcmp(buffer, ref_type, ref_length) ||
2501                    get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
2502                        free(buffer);
2503                        return NULL;
2504                }
2505                free(buffer);
2506                /* Now we have the ID of the referred-to object in
2507                 * actual_sha1.  Check again. */
2508        }
2509}
2510
2511static void write_sha1_file_prepare(const void *buf, unsigned long len,
2512                                    const char *type, unsigned char *sha1,
2513                                    char *hdr, int *hdrlen)
2514{
2515        git_SHA_CTX c;
2516
2517        /* Generate the header */
2518        *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1;
2519
2520        /* Sha1.. */
2521        git_SHA1_Init(&c);
2522        git_SHA1_Update(&c, hdr, *hdrlen);
2523        git_SHA1_Update(&c, buf, len);
2524        git_SHA1_Final(sha1, &c);
2525}
2526
2527/*
2528 * Move the just written object into its final resting place.
2529 */
2530int finalize_object_file(const char *tmpfile, const char *filename)
2531{
2532        int ret = 0;
2533
2534        if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
2535                goto try_rename;
2536        else if (link(tmpfile, filename))
2537                ret = errno;
2538
2539        /*
2540         * Coda hack - coda doesn't like cross-directory links,
2541         * so we fall back to a rename, which will mean that it
2542         * won't be able to check collisions, but that's not a
2543         * big deal.
2544         *
2545         * The same holds for FAT formatted media.
2546         *
2547         * When this succeeds, we just return.  We have nothing
2548         * left to unlink.
2549         */
2550        if (ret && ret != EEXIST) {
2551        try_rename:
2552                if (!rename(tmpfile, filename))
2553                        goto out;
2554                ret = errno;
2555        }
2556        unlink_or_warn(tmpfile);
2557        if (ret) {
2558                if (ret != EEXIST) {
2559                        return error_errno("unable to write sha1 filename %s", filename);
2560                }
2561                /* FIXME!!! Collision check here ? */
2562        }
2563
2564out:
2565        if (adjust_shared_perm(filename))
2566                return error("unable to set permission to '%s'", filename);
2567        return 0;
2568}
2569
2570static int write_buffer(int fd, const void *buf, size_t len)
2571{
2572        if (write_in_full(fd, buf, len) < 0)
2573                return error_errno("file write error");
2574        return 0;
2575}
2576
2577int hash_sha1_file(const void *buf, unsigned long len, const char *type,
2578                   unsigned char *sha1)
2579{
2580        char hdr[32];
2581        int hdrlen = sizeof(hdr);
2582        write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
2583        return 0;
2584}
2585
2586/* Finalize a file on disk, and close it. */
2587static void close_sha1_file(int fd)
2588{
2589        if (fsync_object_files)
2590                fsync_or_die(fd, "sha1 file");
2591        if (close(fd) != 0)
2592                die_errno("error when closing sha1 file");
2593}
2594
2595/* Size of directory component, including the ending '/' */
2596static inline int directory_size(const char *filename)
2597{
2598        const char *s = strrchr(filename, '/');
2599        if (!s)
2600                return 0;
2601        return s - filename + 1;
2602}
2603
2604/*
2605 * This creates a temporary file in the same directory as the final
2606 * 'filename'
2607 *
2608 * We want to avoid cross-directory filename renames, because those
2609 * can have problems on various filesystems (FAT, NFS, Coda).
2610 */
2611static int create_tmpfile(struct strbuf *tmp, const char *filename)
2612{
2613        int fd, dirlen = directory_size(filename);
2614
2615        strbuf_reset(tmp);
2616        strbuf_add(tmp, filename, dirlen);
2617        strbuf_addstr(tmp, "tmp_obj_XXXXXX");
2618        fd = git_mkstemp_mode(tmp->buf, 0444);
2619        if (fd < 0 && dirlen && errno == ENOENT) {
2620                /*
2621                 * Make sure the directory exists; note that the contents
2622                 * of the buffer are undefined after mkstemp returns an
2623                 * error, so we have to rewrite the whole buffer from
2624                 * scratch.
2625                 */
2626                strbuf_reset(tmp);
2627                strbuf_add(tmp, filename, dirlen - 1);
2628                if (mkdir(tmp->buf, 0777) && errno != EEXIST)
2629                        return -1;
2630                if (adjust_shared_perm(tmp->buf))
2631                        return -1;
2632
2633                /* Try again */
2634                strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
2635                fd = git_mkstemp_mode(tmp->buf, 0444);
2636        }
2637        return fd;
2638}
2639
2640static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
2641                              const void *buf, unsigned long len, time_t mtime)
2642{
2643        int fd, ret;
2644        unsigned char compressed[4096];
2645        git_zstream stream;
2646        git_SHA_CTX c;
2647        unsigned char parano_sha1[20];
2648        static struct strbuf tmp_file = STRBUF_INIT;
2649        const char *filename = sha1_file_name(sha1);
2650
2651        fd = create_tmpfile(&tmp_file, filename);
2652        if (fd < 0) {
2653                if (errno == EACCES)
2654                        return error("insufficient permission for adding an object to repository database %s", get_object_directory());
2655                else
2656                        return error_errno("unable to create temporary file");
2657        }
2658
2659        /* Set it up */
2660        git_deflate_init(&stream, zlib_compression_level);
2661        stream.next_out = compressed;
2662        stream.avail_out = sizeof(compressed);
2663        git_SHA1_Init(&c);
2664
2665        /* First header.. */
2666        stream.next_in = (unsigned char *)hdr;
2667        stream.avail_in = hdrlen;
2668        while (git_deflate(&stream, 0) == Z_OK)
2669                ; /* nothing */
2670        git_SHA1_Update(&c, hdr, hdrlen);
2671
2672        /* Then the data itself.. */
2673        stream.next_in = (void *)buf;
2674        stream.avail_in = len;
2675        do {
2676                unsigned char *in0 = stream.next_in;
2677                ret = git_deflate(&stream, Z_FINISH);
2678                git_SHA1_Update(&c, in0, stream.next_in - in0);
2679                if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
2680                        die("unable to write sha1 file");
2681                stream.next_out = compressed;
2682                stream.avail_out = sizeof(compressed);
2683        } while (ret == Z_OK);
2684
2685        if (ret != Z_STREAM_END)
2686                die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
2687        ret = git_deflate_end_gently(&stream);
2688        if (ret != Z_OK)
2689                die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
2690        git_SHA1_Final(parano_sha1, &c);
2691        if (hashcmp(sha1, parano_sha1) != 0)
2692                die("confused by unstable object source data for %s", sha1_to_hex(sha1));
2693
2694        close_sha1_file(fd);
2695
2696        if (mtime) {
2697                struct utimbuf utb;
2698                utb.actime = mtime;
2699                utb.modtime = mtime;
2700                if (utime(tmp_file.buf, &utb) < 0)
2701                        warning_errno("failed utime() on %s", tmp_file.buf);
2702        }
2703
2704        return finalize_object_file(tmp_file.buf, filename);
2705}
2706
2707static int freshen_loose_object(const unsigned char *sha1)
2708{
2709        return check_and_freshen(sha1, 1);
2710}
2711
2712static int freshen_packed_object(const unsigned char *sha1)
2713{
2714        struct pack_entry e;
2715        if (!find_pack_entry(sha1, &e))
2716                return 0;
2717        if (e.p->freshened)
2718                return 1;
2719        if (!freshen_file(e.p->pack_name))
2720                return 0;
2721        e.p->freshened = 1;
2722        return 1;
2723}
2724
2725int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
2726{
2727        char hdr[32];
2728        int hdrlen = sizeof(hdr);
2729
2730        /* Normally if we have it in the pack then we do not bother writing
2731         * it out into .git/objects/??/?{38} file.
2732         */
2733        write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
2734        if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
2735                return 0;
2736        return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
2737}
2738
2739int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
2740                             unsigned char *sha1, unsigned flags)
2741{
2742        char *header;
2743        int hdrlen, status = 0;
2744
2745        /* type string, SP, %lu of the length plus NUL must fit this */
2746        hdrlen = strlen(type) + 32;
2747        header = xmalloc(hdrlen);
2748        write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);
2749
2750        if (!(flags & HASH_WRITE_OBJECT))
2751                goto cleanup;
2752        if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
2753                goto cleanup;
2754        status = write_loose_object(sha1, header, hdrlen, buf, len, 0);
2755
2756cleanup:
2757        free(header);
2758        return status;
2759}
2760
2761int force_object_loose(const unsigned char *sha1, time_t mtime)
2762{
2763        void *buf;
2764        unsigned long len;
2765        enum object_type type;
2766        char hdr[32];
2767        int hdrlen;
2768        int ret;
2769
2770        if (has_loose_object(sha1))
2771                return 0;
2772        buf = read_object(sha1, &type, &len);
2773        if (!buf)
2774                return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
2775        hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1;
2776        ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
2777        free(buf);
2778
2779        return ret;
2780}
2781
2782int has_pack_index(const unsigned char *sha1)
2783{
2784        struct stat st;
2785        if (stat(sha1_pack_index_name(sha1), &st))
2786                return 0;
2787        return 1;
2788}
2789
2790int has_sha1_pack(const unsigned char *sha1)
2791{
2792        struct pack_entry e;
2793        return find_pack_entry(sha1, &e);
2794}
2795
2796int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
2797{
2798        if (!startup_info->have_repository)
2799                return 0;
2800        return sha1_object_info_extended(sha1, NULL,
2801                                         flags | OBJECT_INFO_SKIP_CACHED) >= 0;
2802}
2803
2804int has_object_file(const struct object_id *oid)
2805{
2806        return has_sha1_file(oid->hash);
2807}
2808
2809int has_object_file_with_flags(const struct object_id *oid, int flags)
2810{
2811        return has_sha1_file_with_flags(oid->hash, flags);
2812}
2813
2814static void check_tree(const void *buf, size_t size)
2815{
2816        struct tree_desc desc;
2817        struct name_entry entry;
2818
2819        init_tree_desc(&desc, buf, size);
2820        while (tree_entry(&desc, &entry))
2821                /* do nothing
2822                 * tree_entry() will die() on malformed entries */
2823                ;
2824}
2825
2826static void check_commit(const void *buf, size_t size)
2827{
2828        struct commit c;
2829        memset(&c, 0, sizeof(c));
2830        if (parse_commit_buffer(&c, buf, size))
2831                die("corrupt commit");
2832}
2833
2834static void check_tag(const void *buf, size_t size)
2835{
2836        struct tag t;
2837        memset(&t, 0, sizeof(t));
2838        if (parse_tag_buffer(&t, buf, size))
2839                die("corrupt tag");
2840}
2841
2842static int index_mem(unsigned char *sha1, void *buf, size_t size,
2843                     enum object_type type,
2844                     const char *path, unsigned flags)
2845{
2846        int ret, re_allocated = 0;
2847        int write_object = flags & HASH_WRITE_OBJECT;
2848
2849        if (!type)
2850                type = OBJ_BLOB;
2851
2852        /*
2853         * Convert blobs to git internal format
2854         */
2855        if ((type == OBJ_BLOB) && path) {
2856                struct strbuf nbuf = STRBUF_INIT;
2857                if (convert_to_git(&the_index, path, buf, size, &nbuf,
2858                                   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
2859                        buf = strbuf_detach(&nbuf, &size);
2860                        re_allocated = 1;
2861                }
2862        }
2863        if (flags & HASH_FORMAT_CHECK) {
2864                if (type == OBJ_TREE)
2865                        check_tree(buf, size);
2866                if (type == OBJ_COMMIT)
2867                        check_commit(buf, size);
2868                if (type == OBJ_TAG)
2869                        check_tag(buf, size);
2870        }
2871
2872        if (write_object)
2873                ret = write_sha1_file(buf, size, typename(type), sha1);
2874        else
2875                ret = hash_sha1_file(buf, size, typename(type), sha1);
2876        if (re_allocated)
2877                free(buf);
2878        return ret;
2879}
2880
2881static int index_stream_convert_blob(unsigned char *sha1, int fd,
2882                                     const char *path, unsigned flags)
2883{
2884        int ret;
2885        const int write_object = flags & HASH_WRITE_OBJECT;
2886        struct strbuf sbuf = STRBUF_INIT;
2887
2888        assert(path);
2889        assert(would_convert_to_git_filter_fd(path));
2890
2891        convert_to_git_filter_fd(&the_index, path, fd, &sbuf,
2892                                 write_object ? safe_crlf : SAFE_CRLF_FALSE);
2893
2894        if (write_object)
2895                ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
2896                                      sha1);
2897        else
2898                ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
2899                                     sha1);
2900        strbuf_release(&sbuf);
2901        return ret;
2902}
2903
2904static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
2905                      const char *path, unsigned flags)
2906{
2907        struct strbuf sbuf = STRBUF_INIT;
2908        int ret;
2909
2910        if (strbuf_read(&sbuf, fd, 4096) >= 0)
2911                ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);
2912        else
2913                ret = -1;
2914        strbuf_release(&sbuf);
2915        return ret;
2916}
2917
2918#define SMALL_FILE_SIZE (32*1024)
2919
2920static int index_core(unsigned char *sha1, int fd, size_t size,
2921                      enum object_type type, const char *path,
2922                      unsigned flags)
2923{
2924        int ret;
2925
2926        if (!size) {
2927                ret = index_mem(sha1, "", size, type, path, flags);
2928        } else if (size <= SMALL_FILE_SIZE) {
2929                char *buf = xmalloc(size);
2930                if (size == read_in_full(fd, buf, size))
2931                        ret = index_mem(sha1, buf, size, type, path, flags);
2932                else
2933                        ret = error_errno("short read");
2934                free(buf);
2935        } else {
2936                void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
2937                ret = index_mem(sha1, buf, size, type, path, flags);
2938                munmap(buf, size);
2939        }
2940        return ret;
2941}
2942
2943/*
2944 * This creates one packfile per large blob unless bulk-checkin
2945 * machinery is "plugged".
2946 *
2947 * This also bypasses the usual "convert-to-git" dance, and that is on
2948 * purpose. We could write a streaming version of the converting
2949 * functions and insert that before feeding the data to fast-import
2950 * (or equivalent in-core API described above). However, that is
2951 * somewhat complicated, as we do not know the size of the filter
2952 * result, which we need to know beforehand when writing a git object.
2953 * Since the primary motivation for trying to stream from the working
2954 * tree file and to avoid mmaping it in core is to deal with large
2955 * binary blobs, they generally do not want to get any conversion, and
2956 * callers should avoid this code path when filters are requested.
2957 */
2958static int index_stream(unsigned char *sha1, int fd, size_t size,
2959                        enum object_type type, const char *path,
2960                        unsigned flags)
2961{
2962        return index_bulk_checkin(sha1, fd, size, type, path, flags);
2963}
2964
2965int index_fd(unsigned char *sha1, int fd, struct stat *st,
2966             enum object_type type, const char *path, unsigned flags)
2967{
2968        int ret;
2969
2970        /*
2971         * Call xsize_t() only when needed to avoid potentially unnecessary
2972         * die() for large files.
2973         */
2974        if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
2975                ret = index_stream_convert_blob(sha1, fd, path, flags);
2976        else if (!S_ISREG(st->st_mode))
2977                ret = index_pipe(sha1, fd, type, path, flags);
2978        else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
2979                 (path && would_convert_to_git(&the_index, path)))
2980                ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
2981                                 flags);
2982        else
2983                ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
2984                                   flags);
2985        close(fd);
2986        return ret;
2987}
2988
2989int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
2990{
2991        int fd;
2992        struct strbuf sb = STRBUF_INIT;
2993
2994        switch (st->st_mode & S_IFMT) {
2995        case S_IFREG:
2996                fd = open(path, O_RDONLY);
2997                if (fd < 0)
2998                        return error_errno("open(\"%s\")", path);
2999                if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
3000                        return error("%s: failed to insert into database",
3001                                     path);
3002                break;
3003        case S_IFLNK:
3004                if (strbuf_readlink(&sb, path, st->st_size))
3005                        return error_errno("readlink(\"%s\")", path);
3006                if (!(flags & HASH_WRITE_OBJECT))
3007                        hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
3008                else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
3009                        return error("%s: failed to insert into database",
3010                                     path);
3011                strbuf_release(&sb);
3012                break;
3013        case S_IFDIR:
3014                return resolve_gitlink_ref(path, "HEAD", sha1);
3015        default:
3016                return error("%s: unsupported file type", path);
3017        }
3018        return 0;
3019}
3020
3021int read_pack_header(int fd, struct pack_header *header)
3022{
3023        if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
3024                /* "eof before pack header was fully read" */
3025                return PH_ERROR_EOF;
3026
3027        if (header->hdr_signature != htonl(PACK_SIGNATURE))
3028                /* "protocol error (pack signature mismatch detected)" */
3029                return PH_ERROR_PACK_SIGNATURE;
3030        if (!pack_version_ok(header->hdr_version))
3031                /* "protocol error (pack version unsupported)" */
3032                return PH_ERROR_PROTOCOL;
3033        return 0;
3034}
3035
3036void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
3037{
3038        enum object_type type = sha1_object_info(sha1, NULL);
3039        if (type < 0)
3040                die("%s is not a valid object", sha1_to_hex(sha1));
3041        if (type != expect)
3042                die("%s is not a valid '%s' object", sha1_to_hex(sha1),
3043                    typename(expect));
3044}
3045
3046int for_each_file_in_obj_subdir(unsigned int subdir_nr,
3047                                struct strbuf *path,
3048                                each_loose_object_fn obj_cb,
3049                                each_loose_cruft_fn cruft_cb,
3050                                each_loose_subdir_fn subdir_cb,
3051                                void *data)
3052{
3053        size_t origlen, baselen;
3054        DIR *dir;
3055        struct dirent *de;
3056        int r = 0;
3057
3058        if (subdir_nr > 0xff)
3059                BUG("invalid loose object subdirectory: %x", subdir_nr);
3060
3061        origlen = path->len;
3062        strbuf_complete(path, '/');
3063        strbuf_addf(path, "%02x", subdir_nr);
3064        baselen = path->len;
3065
3066        dir = opendir(path->buf);
3067        if (!dir) {
3068                if (errno != ENOENT)
3069                        r = error_errno("unable to open %s", path->buf);
3070                strbuf_setlen(path, origlen);
3071                return r;
3072        }
3073
3074        while ((de = readdir(dir))) {
3075                if (is_dot_or_dotdot(de->d_name))
3076                        continue;
3077
3078                strbuf_setlen(path, baselen);
3079                strbuf_addf(path, "/%s", de->d_name);
3080
3081                if (strlen(de->d_name) == GIT_SHA1_HEXSZ - 2)  {
3082                        char hex[GIT_MAX_HEXSZ+1];
3083                        struct object_id oid;
3084
3085                        xsnprintf(hex, sizeof(hex), "%02x%s",
3086                                  subdir_nr, de->d_name);
3087                        if (!get_oid_hex(hex, &oid)) {
3088                                if (obj_cb) {
3089                                        r = obj_cb(&oid, path->buf, data);
3090                                        if (r)
3091                                                break;
3092                                }
3093                                continue;
3094                        }
3095                }
3096
3097                if (cruft_cb) {
3098                        r = cruft_cb(de->d_name, path->buf, data);
3099                        if (r)
3100                                break;
3101                }
3102        }
3103        closedir(dir);
3104
3105        strbuf_setlen(path, baselen);
3106        if (!r && subdir_cb)
3107                r = subdir_cb(subdir_nr, path->buf, data);
3108
3109        strbuf_setlen(path, origlen);
3110
3111        return r;
3112}
3113
3114int for_each_loose_file_in_objdir_buf(struct strbuf *path,
3115                            each_loose_object_fn obj_cb,
3116                            each_loose_cruft_fn cruft_cb,
3117                            each_loose_subdir_fn subdir_cb,
3118                            void *data)
3119{
3120        int r = 0;
3121        int i;
3122
3123        for (i = 0; i < 256; i++) {
3124                r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
3125                                                subdir_cb, data);
3126                if (r)
3127                        break;
3128        }
3129
3130        return r;
3131}
3132
3133int for_each_loose_file_in_objdir(const char *path,
3134                                  each_loose_object_fn obj_cb,
3135                                  each_loose_cruft_fn cruft_cb,
3136                                  each_loose_subdir_fn subdir_cb,
3137                                  void *data)
3138{
3139        struct strbuf buf = STRBUF_INIT;
3140        int r;
3141
3142        strbuf_addstr(&buf, path);
3143        r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
3144                                              subdir_cb, data);
3145        strbuf_release(&buf);
3146
3147        return r;
3148}
3149
3150struct loose_alt_odb_data {
3151        each_loose_object_fn *cb;
3152        void *data;
3153};
3154
3155static int loose_from_alt_odb(struct alternate_object_database *alt,
3156                              void *vdata)
3157{
3158        struct loose_alt_odb_data *data = vdata;
3159        struct strbuf buf = STRBUF_INIT;
3160        int r;
3161
3162        strbuf_addstr(&buf, alt->path);
3163        r = for_each_loose_file_in_objdir_buf(&buf,
3164                                              data->cb, NULL, NULL,
3165                                              data->data);
3166        strbuf_release(&buf);
3167        return r;
3168}
3169
3170int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
3171{
3172        struct loose_alt_odb_data alt;
3173        int r;
3174
3175        r = for_each_loose_file_in_objdir(get_object_directory(),
3176                                          cb, NULL, NULL, data);
3177        if (r)
3178                return r;
3179
3180        if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
3181                return 0;
3182
3183        alt.cb = cb;
3184        alt.data = data;
3185        return foreach_alt_odb(loose_from_alt_odb, &alt);
3186}
3187
3188static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
3189{
3190        uint32_t i;
3191        int r = 0;
3192
3193        for (i = 0; i < p->num_objects; i++) {
3194                struct object_id oid;
3195
3196                if (!nth_packed_object_oid(&oid, p, i))
3197                        return error("unable to get sha1 of object %u in %s",
3198                                     i, p->pack_name);
3199
3200                r = cb(&oid, p, i, data);
3201                if (r)
3202                        break;
3203        }
3204        return r;
3205}
3206
3207int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
3208{
3209        struct packed_git *p;
3210        int r = 0;
3211        int pack_errors = 0;
3212
3213        prepare_packed_git();
3214        for (p = packed_git; p; p = p->next) {
3215                if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
3216                        continue;
3217                if (open_pack_index(p)) {
3218                        pack_errors = 1;
3219                        continue;
3220                }
3221                r = for_each_object_in_pack(p, cb, data);
3222                if (r)
3223                        break;
3224        }
3225        return r ? r : pack_errors;
3226}
3227
3228static int check_stream_sha1(git_zstream *stream,
3229                             const char *hdr,
3230                             unsigned long size,
3231                             const char *path,
3232                             const unsigned char *expected_sha1)
3233{
3234        git_SHA_CTX c;
3235        unsigned char real_sha1[GIT_MAX_RAWSZ];
3236        unsigned char buf[4096];
3237        unsigned long total_read;
3238        int status = Z_OK;
3239
3240        git_SHA1_Init(&c);
3241        git_SHA1_Update(&c, hdr, stream->total_out);
3242
3243        /*
3244         * We already read some bytes into hdr, but the ones up to the NUL
3245         * do not count against the object's content size.
3246         */
3247        total_read = stream->total_out - strlen(hdr) - 1;
3248
3249        /*
3250         * This size comparison must be "<=" to read the final zlib packets;
3251         * see the comment in unpack_sha1_rest for details.
3252         */
3253        while (total_read <= size &&
3254               (status == Z_OK || status == Z_BUF_ERROR)) {
3255                stream->next_out = buf;
3256                stream->avail_out = sizeof(buf);
3257                if (size - total_read < stream->avail_out)
3258                        stream->avail_out = size - total_read;
3259                status = git_inflate(stream, Z_FINISH);
3260                git_SHA1_Update(&c, buf, stream->next_out - buf);
3261                total_read += stream->next_out - buf;
3262        }
3263        git_inflate_end(stream);
3264
3265        if (status != Z_STREAM_END) {
3266                error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
3267                return -1;
3268        }
3269        if (stream->avail_in) {
3270                error("garbage at end of loose object '%s'",
3271                      sha1_to_hex(expected_sha1));
3272                return -1;
3273        }
3274
3275        git_SHA1_Final(real_sha1, &c);
3276        if (hashcmp(expected_sha1, real_sha1)) {
3277                error("sha1 mismatch for %s (expected %s)", path,
3278                      sha1_to_hex(expected_sha1));
3279                return -1;
3280        }
3281
3282        return 0;
3283}
3284
3285int read_loose_object(const char *path,
3286                      const unsigned char *expected_sha1,
3287                      enum object_type *type,
3288                      unsigned long *size,
3289                      void **contents)
3290{
3291        int ret = -1;
3292        void *map = NULL;
3293        unsigned long mapsize;
3294        git_zstream stream;
3295        char hdr[32];
3296
3297        *contents = NULL;
3298
3299        map = map_sha1_file_1(path, NULL, &mapsize);
3300        if (!map) {
3301                error_errno("unable to mmap %s", path);
3302                goto out;
3303        }
3304
3305        if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
3306                error("unable to unpack header of %s", path);
3307                goto out;
3308        }
3309
3310        *type = parse_sha1_header(hdr, size);
3311        if (*type < 0) {
3312                error("unable to parse header of %s", path);
3313                git_inflate_end(&stream);
3314                goto out;
3315        }
3316
3317        if (*type == OBJ_BLOB) {
3318                if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
3319                        goto out;
3320        } else {
3321                *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
3322                if (!*contents) {
3323                        error("unable to unpack contents of %s", path);
3324                        git_inflate_end(&stream);
3325                        goto out;
3326                }
3327                if (check_sha1_signature(expected_sha1, *contents,
3328                                         *size, typename(*type))) {
3329                        error("sha1 mismatch for %s (expected %s)", path,
3330                              sha1_to_hex(expected_sha1));
3331                        free(*contents);
3332                        goto out;
3333                }
3334        }
3335
3336        ret = 0; /* everything checks out */
3337
3338out:
3339        if (map)
3340                munmap(map, mapsize);
3341        return ret;
3342}