entry.con commit packfile: refactor hash search with fanout table (b4e00f7)
   1#include "cache.h"
   2#include "blob.h"
   3#include "dir.h"
   4#include "streaming.h"
   5#include "submodule.h"
   6#include "progress.h"
   7
   8static void create_directories(const char *path, int path_len,
   9                               const struct checkout *state)
  10{
  11        char *buf = xmallocz(path_len);
  12        int len = 0;
  13
  14        while (len < path_len) {
  15                do {
  16                        buf[len] = path[len];
  17                        len++;
  18                } while (len < path_len && path[len] != '/');
  19                if (len >= path_len)
  20                        break;
  21                buf[len] = 0;
  22
  23                /*
  24                 * For 'checkout-index --prefix=<dir>', <dir> is
  25                 * allowed to be a symlink to an existing directory,
  26                 * and we set 'state->base_dir_len' below, such that
  27                 * we test the path components of the prefix with the
  28                 * stat() function instead of the lstat() function.
  29                 */
  30                if (has_dirs_only_path(buf, len, state->base_dir_len))
  31                        continue; /* ok, it is already a directory. */
  32
  33                /*
  34                 * If this mkdir() would fail, it could be that there
  35                 * is already a symlink or something else exists
  36                 * there, therefore we then try to unlink it and try
  37                 * one more time to create the directory.
  38                 */
  39                if (mkdir(buf, 0777)) {
  40                        if (errno == EEXIST && state->force &&
  41                            !unlink_or_warn(buf) && !mkdir(buf, 0777))
  42                                continue;
  43                        die_errno("cannot create directory at '%s'", buf);
  44                }
  45        }
  46        free(buf);
  47}
  48
  49static void remove_subtree(struct strbuf *path)
  50{
  51        DIR *dir = opendir(path->buf);
  52        struct dirent *de;
  53        int origlen = path->len;
  54
  55        if (!dir)
  56                die_errno("cannot opendir '%s'", path->buf);
  57        while ((de = readdir(dir)) != NULL) {
  58                struct stat st;
  59
  60                if (is_dot_or_dotdot(de->d_name))
  61                        continue;
  62
  63                strbuf_addch(path, '/');
  64                strbuf_addstr(path, de->d_name);
  65                if (lstat(path->buf, &st))
  66                        die_errno("cannot lstat '%s'", path->buf);
  67                if (S_ISDIR(st.st_mode))
  68                        remove_subtree(path);
  69                else if (unlink(path->buf))
  70                        die_errno("cannot unlink '%s'", path->buf);
  71                strbuf_setlen(path, origlen);
  72        }
  73        closedir(dir);
  74        if (rmdir(path->buf))
  75                die_errno("cannot rmdir '%s'", path->buf);
  76}
  77
  78static int create_file(const char *path, unsigned int mode)
  79{
  80        mode = (mode & 0100) ? 0777 : 0666;
  81        return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
  82}
  83
  84static void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
  85{
  86        enum object_type type;
  87        void *new = read_sha1_file(ce->oid.hash, &type, size);
  88
  89        if (new) {
  90                if (type == OBJ_BLOB)
  91                        return new;
  92                free(new);
  93        }
  94        return NULL;
  95}
  96
  97static int open_output_fd(char *path, const struct cache_entry *ce, int to_tempfile)
  98{
  99        int symlink = (ce->ce_mode & S_IFMT) != S_IFREG;
 100        if (to_tempfile) {
 101                xsnprintf(path, TEMPORARY_FILENAME_LENGTH, "%s",
 102                          symlink ? ".merge_link_XXXXXX" : ".merge_file_XXXXXX");
 103                return mkstemp(path);
 104        } else {
 105                return create_file(path, !symlink ? ce->ce_mode : 0666);
 106        }
 107}
 108
 109static int fstat_output(int fd, const struct checkout *state, struct stat *st)
 110{
 111        /* use fstat() only when path == ce->name */
 112        if (fstat_is_reliable() &&
 113            state->refresh_cache && !state->base_dir_len) {
 114                fstat(fd, st);
 115                return 1;
 116        }
 117        return 0;
 118}
 119
 120static int streaming_write_entry(const struct cache_entry *ce, char *path,
 121                                 struct stream_filter *filter,
 122                                 const struct checkout *state, int to_tempfile,
 123                                 int *fstat_done, struct stat *statbuf)
 124{
 125        int result = 0;
 126        int fd;
 127
 128        fd = open_output_fd(path, ce, to_tempfile);
 129        if (fd < 0)
 130                return -1;
 131
 132        result |= stream_blob_to_fd(fd, &ce->oid, filter, 1);
 133        *fstat_done = fstat_output(fd, state, statbuf);
 134        result |= close(fd);
 135
 136        if (result)
 137                unlink(path);
 138        return result;
 139}
 140
 141void enable_delayed_checkout(struct checkout *state)
 142{
 143        if (!state->delayed_checkout) {
 144                state->delayed_checkout = xmalloc(sizeof(*state->delayed_checkout));
 145                state->delayed_checkout->state = CE_CAN_DELAY;
 146                string_list_init(&state->delayed_checkout->filters, 0);
 147                string_list_init(&state->delayed_checkout->paths, 0);
 148        }
 149}
 150
 151static int remove_available_paths(struct string_list_item *item, void *cb_data)
 152{
 153        struct string_list *available_paths = cb_data;
 154        struct string_list_item *available;
 155
 156        available = string_list_lookup(available_paths, item->string);
 157        if (available)
 158                available->util = (void *)item->string;
 159        return !available;
 160}
 161
 162int finish_delayed_checkout(struct checkout *state)
 163{
 164        int errs = 0;
 165        unsigned delayed_object_count;
 166        off_t filtered_bytes = 0;
 167        struct string_list_item *filter, *path;
 168        struct progress *progress;
 169        struct delayed_checkout *dco = state->delayed_checkout;
 170
 171        if (!state->delayed_checkout)
 172                return errs;
 173
 174        dco->state = CE_RETRY;
 175        delayed_object_count = dco->paths.nr;
 176        progress = start_delayed_progress(_("Filtering content"), delayed_object_count);
 177        while (dco->filters.nr > 0) {
 178                for_each_string_list_item(filter, &dco->filters) {
 179                        struct string_list available_paths = STRING_LIST_INIT_NODUP;
 180                        display_progress(progress, delayed_object_count - dco->paths.nr);
 181
 182                        if (!async_query_available_blobs(filter->string, &available_paths)) {
 183                                /* Filter reported an error */
 184                                errs = 1;
 185                                filter->string = "";
 186                                continue;
 187                        }
 188                        if (available_paths.nr <= 0) {
 189                                /*
 190                                 * Filter responded with no entries. That means
 191                                 * the filter is done and we can remove the
 192                                 * filter from the list (see
 193                                 * "string_list_remove_empty_items" call below).
 194                                 */
 195                                filter->string = "";
 196                                continue;
 197                        }
 198
 199                        /*
 200                         * In dco->paths we store a list of all delayed paths.
 201                         * The filter just send us a list of available paths.
 202                         * Remove them from the list.
 203                         */
 204                        filter_string_list(&dco->paths, 0,
 205                                &remove_available_paths, &available_paths);
 206
 207                        for_each_string_list_item(path, &available_paths) {
 208                                struct cache_entry* ce;
 209
 210                                if (!path->util) {
 211                                        error("external filter '%s' signaled that '%s' "
 212                                              "is now available although it has not been "
 213                                              "delayed earlier",
 214                                              filter->string, path->string);
 215                                        errs |= 1;
 216
 217                                        /*
 218                                         * Do not ask the filter for available blobs,
 219                                         * again, as the filter is likely buggy.
 220                                         */
 221                                        filter->string = "";
 222                                        continue;
 223                                }
 224                                ce = index_file_exists(state->istate, path->string,
 225                                                       strlen(path->string), 0);
 226                                if (ce) {
 227                                        errs |= checkout_entry(ce, state, NULL);
 228                                        filtered_bytes += ce->ce_stat_data.sd_size;
 229                                        display_throughput(progress, filtered_bytes);
 230                                } else
 231                                        errs = 1;
 232                        }
 233                }
 234                string_list_remove_empty_items(&dco->filters, 0);
 235        }
 236        stop_progress(&progress);
 237        string_list_clear(&dco->filters, 0);
 238
 239        /* At this point we should not have any delayed paths anymore. */
 240        errs |= dco->paths.nr;
 241        for_each_string_list_item(path, &dco->paths) {
 242                error("'%s' was not filtered properly", path->string);
 243        }
 244        string_list_clear(&dco->paths, 0);
 245
 246        free(dco);
 247        state->delayed_checkout = NULL;
 248
 249        return errs;
 250}
 251
 252static int write_entry(struct cache_entry *ce,
 253                       char *path, const struct checkout *state, int to_tempfile)
 254{
 255        unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
 256        struct delayed_checkout *dco = state->delayed_checkout;
 257        int fd, ret, fstat_done = 0;
 258        char *new;
 259        struct strbuf buf = STRBUF_INIT;
 260        unsigned long size;
 261        ssize_t wrote;
 262        size_t newsize = 0;
 263        struct stat st;
 264        const struct submodule *sub;
 265
 266        if (ce_mode_s_ifmt == S_IFREG) {
 267                struct stream_filter *filter = get_stream_filter(ce->name,
 268                                                                 ce->oid.hash);
 269                if (filter &&
 270                    !streaming_write_entry(ce, path, filter,
 271                                           state, to_tempfile,
 272                                           &fstat_done, &st))
 273                        goto finish;
 274        }
 275
 276        switch (ce_mode_s_ifmt) {
 277        case S_IFLNK:
 278                new = read_blob_entry(ce, &size);
 279                if (!new)
 280                        return error("unable to read sha1 file of %s (%s)",
 281                                     path, oid_to_hex(&ce->oid));
 282
 283                /*
 284                 * We can't make a real symlink; write out a regular file entry
 285                 * with the symlink destination as its contents.
 286                 */
 287                if (!has_symlinks || to_tempfile)
 288                        goto write_file_entry;
 289
 290                ret = symlink(new, path);
 291                free(new);
 292                if (ret)
 293                        return error_errno("unable to create symlink %s", path);
 294                break;
 295
 296        case S_IFREG:
 297                /*
 298                 * We do not send the blob in case of a retry, so do not
 299                 * bother reading it at all.
 300                 */
 301                if (dco && dco->state == CE_RETRY) {
 302                        new = NULL;
 303                        size = 0;
 304                } else {
 305                        new = read_blob_entry(ce, &size);
 306                        if (!new)
 307                                return error("unable to read sha1 file of %s (%s)",
 308                                             path, oid_to_hex(&ce->oid));
 309                }
 310
 311                /*
 312                 * Convert from git internal format to working tree format
 313                 */
 314                if (dco && dco->state != CE_NO_DELAY) {
 315                        ret = async_convert_to_working_tree(ce->name, new,
 316                                                            size, &buf, dco);
 317                        if (ret && string_list_has_string(&dco->paths, ce->name)) {
 318                                free(new);
 319                                goto delayed;
 320                        }
 321                } else
 322                        ret = convert_to_working_tree(ce->name, new, size, &buf);
 323
 324                if (ret) {
 325                        free(new);
 326                        new = strbuf_detach(&buf, &newsize);
 327                        size = newsize;
 328                }
 329                /*
 330                 * No "else" here as errors from convert are OK at this
 331                 * point. If the error would have been fatal (e.g.
 332                 * filter is required), then we would have died already.
 333                 */
 334
 335        write_file_entry:
 336                fd = open_output_fd(path, ce, to_tempfile);
 337                if (fd < 0) {
 338                        free(new);
 339                        return error_errno("unable to create file %s", path);
 340                }
 341
 342                wrote = write_in_full(fd, new, size);
 343                if (!to_tempfile)
 344                        fstat_done = fstat_output(fd, state, &st);
 345                close(fd);
 346                free(new);
 347                if (wrote < 0)
 348                        return error("unable to write file %s", path);
 349                break;
 350
 351        case S_IFGITLINK:
 352                if (to_tempfile)
 353                        return error("cannot create temporary submodule %s", path);
 354                if (mkdir(path, 0777) < 0)
 355                        return error("cannot create submodule directory %s", path);
 356                sub = submodule_from_ce(ce);
 357                if (sub)
 358                        return submodule_move_head(ce->name,
 359                                NULL, oid_to_hex(&ce->oid),
 360                                state->force ? SUBMODULE_MOVE_HEAD_FORCE : 0);
 361                break;
 362
 363        default:
 364                return error("unknown file mode for %s in index", path);
 365        }
 366
 367finish:
 368        if (state->refresh_cache) {
 369                assert(state->istate);
 370                if (!fstat_done)
 371                        if (lstat(ce->name, &st) < 0)
 372                                return error_errno("unable to stat just-written file %s",
 373                                                   ce->name);
 374                fill_stat_cache_info(ce, &st);
 375                ce->ce_flags |= CE_UPDATE_IN_BASE;
 376                state->istate->cache_changed |= CE_ENTRY_CHANGED;
 377        }
 378delayed:
 379        return 0;
 380}
 381
 382/*
 383 * This is like 'lstat()', except it refuses to follow symlinks
 384 * in the path, after skipping "skiplen".
 385 */
 386static int check_path(const char *path, int len, struct stat *st, int skiplen)
 387{
 388        const char *slash = path + len;
 389
 390        while (path < slash && *slash != '/')
 391                slash--;
 392        if (!has_dirs_only_path(path, slash - path, skiplen)) {
 393                errno = ENOENT;
 394                return -1;
 395        }
 396        return lstat(path, st);
 397}
 398
 399/*
 400 * Write the contents from ce out to the working tree.
 401 *
 402 * When topath[] is not NULL, instead of writing to the working tree
 403 * file named by ce, a temporary file is created by this function and
 404 * its name is returned in topath[], which must be able to hold at
 405 * least TEMPORARY_FILENAME_LENGTH bytes long.
 406 */
 407int checkout_entry(struct cache_entry *ce,
 408                   const struct checkout *state, char *topath)
 409{
 410        static struct strbuf path = STRBUF_INIT;
 411        struct stat st;
 412
 413        if (topath)
 414                return write_entry(ce, topath, state, 1);
 415
 416        strbuf_reset(&path);
 417        strbuf_add(&path, state->base_dir, state->base_dir_len);
 418        strbuf_add(&path, ce->name, ce_namelen(ce));
 419
 420        if (!check_path(path.buf, path.len, &st, state->base_dir_len)) {
 421                const struct submodule *sub;
 422                unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
 423                /*
 424                 * Needs to be checked before !changed returns early,
 425                 * as the possibly empty directory was not changed
 426                 */
 427                sub = submodule_from_ce(ce);
 428                if (sub) {
 429                        int err;
 430                        if (!is_submodule_populated_gently(ce->name, &err)) {
 431                                struct stat sb;
 432                                if (lstat(ce->name, &sb))
 433                                        die(_("could not stat file '%s'"), ce->name);
 434                                if (!(st.st_mode & S_IFDIR))
 435                                        unlink_or_warn(ce->name);
 436
 437                                return submodule_move_head(ce->name,
 438                                        NULL, oid_to_hex(&ce->oid), 0);
 439                        } else
 440                                return submodule_move_head(ce->name,
 441                                        "HEAD", oid_to_hex(&ce->oid),
 442                                        state->force ? SUBMODULE_MOVE_HEAD_FORCE : 0);
 443                }
 444
 445                if (!changed)
 446                        return 0;
 447                if (!state->force) {
 448                        if (!state->quiet)
 449                                fprintf(stderr,
 450                                        "%s already exists, no checkout\n",
 451                                        path.buf);
 452                        return -1;
 453                }
 454
 455                /*
 456                 * We unlink the old file, to get the new one with the
 457                 * right permissions (including umask, which is nasty
 458                 * to emulate by hand - much easier to let the system
 459                 * just do the right thing)
 460                 */
 461                if (S_ISDIR(st.st_mode)) {
 462                        /* If it is a gitlink, leave it alone! */
 463                        if (S_ISGITLINK(ce->ce_mode))
 464                                return 0;
 465                        if (!state->force)
 466                                return error("%s is a directory", path.buf);
 467                        remove_subtree(&path);
 468                } else if (unlink(path.buf))
 469                        return error_errno("unable to unlink old '%s'", path.buf);
 470        } else if (state->not_new)
 471                return 0;
 472
 473        create_directories(path.buf, path.len, state);
 474        return write_entry(ce, path.buf, state, 0);
 475}