dir.con commit preparing for 2.10.3 (c3808ca)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * See Documentation/technical/api-directory-listing.txt
   6 *
   7 * Copyright (C) Linus Torvalds, 2005-2006
   8 *               Junio Hamano, 2005-2006
   9 */
  10#include "cache.h"
  11#include "dir.h"
  12#include "refs.h"
  13#include "wildmatch.h"
  14#include "pathspec.h"
  15#include "utf8.h"
  16#include "varint.h"
  17#include "ewah/ewok.h"
  18
  19struct path_simplify {
  20        int len;
  21        const char *path;
  22};
  23
  24/*
  25 * Tells read_directory_recursive how a file or directory should be treated.
  26 * Values are ordered by significance, e.g. if a directory contains both
  27 * excluded and untracked files, it is listed as untracked because
  28 * path_untracked > path_excluded.
  29 */
  30enum path_treatment {
  31        path_none = 0,
  32        path_recurse,
  33        path_excluded,
  34        path_untracked
  35};
  36
  37/*
  38 * Support data structure for our opendir/readdir/closedir wrappers
  39 */
  40struct cached_dir {
  41        DIR *fdir;
  42        struct untracked_cache_dir *untracked;
  43        int nr_files;
  44        int nr_dirs;
  45
  46        struct dirent *de;
  47        const char *file;
  48        struct untracked_cache_dir *ucd;
  49};
  50
  51static enum path_treatment read_directory_recursive(struct dir_struct *dir,
  52        const char *path, int len, struct untracked_cache_dir *untracked,
  53        int check_only, const struct path_simplify *simplify);
  54static int get_dtype(struct dirent *de, const char *path, int len);
  55
  56int fspathcmp(const char *a, const char *b)
  57{
  58        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  59}
  60
  61int fspathncmp(const char *a, const char *b, size_t count)
  62{
  63        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  64}
  65
  66int git_fnmatch(const struct pathspec_item *item,
  67                const char *pattern, const char *string,
  68                int prefix)
  69{
  70        if (prefix > 0) {
  71                if (ps_strncmp(item, pattern, string, prefix))
  72                        return WM_NOMATCH;
  73                pattern += prefix;
  74                string += prefix;
  75        }
  76        if (item->flags & PATHSPEC_ONESTAR) {
  77                int pattern_len = strlen(++pattern);
  78                int string_len = strlen(string);
  79                return string_len < pattern_len ||
  80                        ps_strcmp(item, pattern,
  81                                  string + string_len - pattern_len);
  82        }
  83        if (item->magic & PATHSPEC_GLOB)
  84                return wildmatch(pattern, string,
  85                                 WM_PATHNAME |
  86                                 (item->magic & PATHSPEC_ICASE ? WM_CASEFOLD : 0),
  87                                 NULL);
  88        else
  89                /* wildmatch has not learned no FNM_PATHNAME mode yet */
  90                return wildmatch(pattern, string,
  91                                 item->magic & PATHSPEC_ICASE ? WM_CASEFOLD : 0,
  92                                 NULL);
  93}
  94
  95static int fnmatch_icase_mem(const char *pattern, int patternlen,
  96                             const char *string, int stringlen,
  97                             int flags)
  98{
  99        int match_status;
 100        struct strbuf pat_buf = STRBUF_INIT;
 101        struct strbuf str_buf = STRBUF_INIT;
 102        const char *use_pat = pattern;
 103        const char *use_str = string;
 104
 105        if (pattern[patternlen]) {
 106                strbuf_add(&pat_buf, pattern, patternlen);
 107                use_pat = pat_buf.buf;
 108        }
 109        if (string[stringlen]) {
 110                strbuf_add(&str_buf, string, stringlen);
 111                use_str = str_buf.buf;
 112        }
 113
 114        if (ignore_case)
 115                flags |= WM_CASEFOLD;
 116        match_status = wildmatch(use_pat, use_str, flags, NULL);
 117
 118        strbuf_release(&pat_buf);
 119        strbuf_release(&str_buf);
 120
 121        return match_status;
 122}
 123
 124static size_t common_prefix_len(const struct pathspec *pathspec)
 125{
 126        int n;
 127        size_t max = 0;
 128
 129        /*
 130         * ":(icase)path" is treated as a pathspec full of
 131         * wildcard. In other words, only prefix is considered common
 132         * prefix. If the pathspec is abc/foo abc/bar, running in
 133         * subdir xyz, the common prefix is still xyz, not xuz/abc as
 134         * in non-:(icase).
 135         */
 136        GUARD_PATHSPEC(pathspec,
 137                       PATHSPEC_FROMTOP |
 138                       PATHSPEC_MAXDEPTH |
 139                       PATHSPEC_LITERAL |
 140                       PATHSPEC_GLOB |
 141                       PATHSPEC_ICASE |
 142                       PATHSPEC_EXCLUDE);
 143
 144        for (n = 0; n < pathspec->nr; n++) {
 145                size_t i = 0, len = 0, item_len;
 146                if (pathspec->items[n].magic & PATHSPEC_EXCLUDE)
 147                        continue;
 148                if (pathspec->items[n].magic & PATHSPEC_ICASE)
 149                        item_len = pathspec->items[n].prefix;
 150                else
 151                        item_len = pathspec->items[n].nowildcard_len;
 152                while (i < item_len && (n == 0 || i < max)) {
 153                        char c = pathspec->items[n].match[i];
 154                        if (c != pathspec->items[0].match[i])
 155                                break;
 156                        if (c == '/')
 157                                len = i + 1;
 158                        i++;
 159                }
 160                if (n == 0 || len < max) {
 161                        max = len;
 162                        if (!max)
 163                                break;
 164                }
 165        }
 166        return max;
 167}
 168
 169/*
 170 * Returns a copy of the longest leading path common among all
 171 * pathspecs.
 172 */
 173char *common_prefix(const struct pathspec *pathspec)
 174{
 175        unsigned long len = common_prefix_len(pathspec);
 176
 177        return len ? xmemdupz(pathspec->items[0].match, len) : NULL;
 178}
 179
 180int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec)
 181{
 182        size_t len;
 183
 184        /*
 185         * Calculate common prefix for the pathspec, and
 186         * use that to optimize the directory walk
 187         */
 188        len = common_prefix_len(pathspec);
 189
 190        /* Read the directory and prune it */
 191        read_directory(dir, pathspec->nr ? pathspec->_raw[0] : "", len, pathspec);
 192        return len;
 193}
 194
 195int within_depth(const char *name, int namelen,
 196                        int depth, int max_depth)
 197{
 198        const char *cp = name, *cpe = name + namelen;
 199
 200        while (cp < cpe) {
 201                if (*cp++ != '/')
 202                        continue;
 203                depth++;
 204                if (depth > max_depth)
 205                        return 0;
 206        }
 207        return 1;
 208}
 209
 210#define DO_MATCH_EXCLUDE   1
 211#define DO_MATCH_DIRECTORY 2
 212
 213/*
 214 * Does 'match' match the given name?
 215 * A match is found if
 216 *
 217 * (1) the 'match' string is leading directory of 'name', or
 218 * (2) the 'match' string is a wildcard and matches 'name', or
 219 * (3) the 'match' string is exactly the same as 'name'.
 220 *
 221 * and the return value tells which case it was.
 222 *
 223 * It returns 0 when there is no match.
 224 */
 225static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 226                               const char *name, int namelen, unsigned flags)
 227{
 228        /* name/namelen has prefix cut off by caller */
 229        const char *match = item->match + prefix;
 230        int matchlen = item->len - prefix;
 231
 232        /*
 233         * The normal call pattern is:
 234         * 1. prefix = common_prefix_len(ps);
 235         * 2. prune something, or fill_directory
 236         * 3. match_pathspec()
 237         *
 238         * 'prefix' at #1 may be shorter than the command's prefix and
 239         * it's ok for #2 to match extra files. Those extras will be
 240         * trimmed at #3.
 241         *
 242         * Suppose the pathspec is 'foo' and '../bar' running from
 243         * subdir 'xyz'. The common prefix at #1 will be empty, thanks
 244         * to "../". We may have xyz/foo _and_ XYZ/foo after #2. The
 245         * user does not want XYZ/foo, only the "foo" part should be
 246         * case-insensitive. We need to filter out XYZ/foo here. In
 247         * other words, we do not trust the caller on comparing the
 248         * prefix part when :(icase) is involved. We do exact
 249         * comparison ourselves.
 250         *
 251         * Normally the caller (common_prefix_len() in fact) does
 252         * _exact_ matching on name[-prefix+1..-1] and we do not need
 253         * to check that part. Be defensive and check it anyway, in
 254         * case common_prefix_len is changed, or a new caller is
 255         * introduced that does not use common_prefix_len.
 256         *
 257         * If the penalty turns out too high when prefix is really
 258         * long, maybe change it to
 259         * strncmp(match, name, item->prefix - prefix)
 260         */
 261        if (item->prefix && (item->magic & PATHSPEC_ICASE) &&
 262            strncmp(item->match, name - prefix, item->prefix))
 263                return 0;
 264
 265        /* If the match was just the prefix, we matched */
 266        if (!*match)
 267                return MATCHED_RECURSIVELY;
 268
 269        if (matchlen <= namelen && !ps_strncmp(item, match, name, matchlen)) {
 270                if (matchlen == namelen)
 271                        return MATCHED_EXACTLY;
 272
 273                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 274                        return MATCHED_RECURSIVELY;
 275        } else if ((flags & DO_MATCH_DIRECTORY) &&
 276                   match[matchlen - 1] == '/' &&
 277                   namelen == matchlen - 1 &&
 278                   !ps_strncmp(item, match, name, namelen))
 279                return MATCHED_EXACTLY;
 280
 281        if (item->nowildcard_len < item->len &&
 282            !git_fnmatch(item, match, name,
 283                         item->nowildcard_len - prefix))
 284                return MATCHED_FNMATCH;
 285
 286        return 0;
 287}
 288
 289/*
 290 * Given a name and a list of pathspecs, returns the nature of the
 291 * closest (i.e. most specific) match of the name to any of the
 292 * pathspecs.
 293 *
 294 * The caller typically calls this multiple times with the same
 295 * pathspec and seen[] array but with different name/namelen
 296 * (e.g. entries from the index) and is interested in seeing if and
 297 * how each pathspec matches all the names it calls this function
 298 * with.  A mark is left in the seen[] array for each pathspec element
 299 * indicating the closest type of match that element achieved, so if
 300 * seen[n] remains zero after multiple invocations, that means the nth
 301 * pathspec did not match any names, which could indicate that the
 302 * user mistyped the nth pathspec.
 303 */
 304static int do_match_pathspec(const struct pathspec *ps,
 305                             const char *name, int namelen,
 306                             int prefix, char *seen,
 307                             unsigned flags)
 308{
 309        int i, retval = 0, exclude = flags & DO_MATCH_EXCLUDE;
 310
 311        GUARD_PATHSPEC(ps,
 312                       PATHSPEC_FROMTOP |
 313                       PATHSPEC_MAXDEPTH |
 314                       PATHSPEC_LITERAL |
 315                       PATHSPEC_GLOB |
 316                       PATHSPEC_ICASE |
 317                       PATHSPEC_EXCLUDE);
 318
 319        if (!ps->nr) {
 320                if (!ps->recursive ||
 321                    !(ps->magic & PATHSPEC_MAXDEPTH) ||
 322                    ps->max_depth == -1)
 323                        return MATCHED_RECURSIVELY;
 324
 325                if (within_depth(name, namelen, 0, ps->max_depth))
 326                        return MATCHED_EXACTLY;
 327                else
 328                        return 0;
 329        }
 330
 331        name += prefix;
 332        namelen -= prefix;
 333
 334        for (i = ps->nr - 1; i >= 0; i--) {
 335                int how;
 336
 337                if ((!exclude &&   ps->items[i].magic & PATHSPEC_EXCLUDE) ||
 338                    ( exclude && !(ps->items[i].magic & PATHSPEC_EXCLUDE)))
 339                        continue;
 340
 341                if (seen && seen[i] == MATCHED_EXACTLY)
 342                        continue;
 343                /*
 344                 * Make exclude patterns optional and never report
 345                 * "pathspec ':(exclude)foo' matches no files"
 346                 */
 347                if (seen && ps->items[i].magic & PATHSPEC_EXCLUDE)
 348                        seen[i] = MATCHED_FNMATCH;
 349                how = match_pathspec_item(ps->items+i, prefix, name,
 350                                          namelen, flags);
 351                if (ps->recursive &&
 352                    (ps->magic & PATHSPEC_MAXDEPTH) &&
 353                    ps->max_depth != -1 &&
 354                    how && how != MATCHED_FNMATCH) {
 355                        int len = ps->items[i].len;
 356                        if (name[len] == '/')
 357                                len++;
 358                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 359                                how = MATCHED_EXACTLY;
 360                        else
 361                                how = 0;
 362                }
 363                if (how) {
 364                        if (retval < how)
 365                                retval = how;
 366                        if (seen && seen[i] < how)
 367                                seen[i] = how;
 368                }
 369        }
 370        return retval;
 371}
 372
 373int match_pathspec(const struct pathspec *ps,
 374                   const char *name, int namelen,
 375                   int prefix, char *seen, int is_dir)
 376{
 377        int positive, negative;
 378        unsigned flags = is_dir ? DO_MATCH_DIRECTORY : 0;
 379        positive = do_match_pathspec(ps, name, namelen,
 380                                     prefix, seen, flags);
 381        if (!(ps->magic & PATHSPEC_EXCLUDE) || !positive)
 382                return positive;
 383        negative = do_match_pathspec(ps, name, namelen,
 384                                     prefix, seen,
 385                                     flags | DO_MATCH_EXCLUDE);
 386        return negative ? 0 : positive;
 387}
 388
 389int report_path_error(const char *ps_matched,
 390                      const struct pathspec *pathspec,
 391                      const char *prefix)
 392{
 393        /*
 394         * Make sure all pathspec matched; otherwise it is an error.
 395         */
 396        int num, errors = 0;
 397        for (num = 0; num < pathspec->nr; num++) {
 398                int other, found_dup;
 399
 400                if (ps_matched[num])
 401                        continue;
 402                /*
 403                 * The caller might have fed identical pathspec
 404                 * twice.  Do not barf on such a mistake.
 405                 * FIXME: parse_pathspec should have eliminated
 406                 * duplicate pathspec.
 407                 */
 408                for (found_dup = other = 0;
 409                     !found_dup && other < pathspec->nr;
 410                     other++) {
 411                        if (other == num || !ps_matched[other])
 412                                continue;
 413                        if (!strcmp(pathspec->items[other].original,
 414                                    pathspec->items[num].original))
 415                                /*
 416                                 * Ok, we have a match already.
 417                                 */
 418                                found_dup = 1;
 419                }
 420                if (found_dup)
 421                        continue;
 422
 423                error("pathspec '%s' did not match any file(s) known to git.",
 424                      pathspec->items[num].original);
 425                errors++;
 426        }
 427        return errors;
 428}
 429
 430/*
 431 * Return the length of the "simple" part of a path match limiter.
 432 */
 433int simple_length(const char *match)
 434{
 435        int len = -1;
 436
 437        for (;;) {
 438                unsigned char c = *match++;
 439                len++;
 440                if (c == '\0' || is_glob_special(c))
 441                        return len;
 442        }
 443}
 444
 445int no_wildcard(const char *string)
 446{
 447        return string[simple_length(string)] == '\0';
 448}
 449
 450void parse_exclude_pattern(const char **pattern,
 451                           int *patternlen,
 452                           unsigned *flags,
 453                           int *nowildcardlen)
 454{
 455        const char *p = *pattern;
 456        size_t i, len;
 457
 458        *flags = 0;
 459        if (*p == '!') {
 460                *flags |= EXC_FLAG_NEGATIVE;
 461                p++;
 462        }
 463        len = strlen(p);
 464        if (len && p[len - 1] == '/') {
 465                len--;
 466                *flags |= EXC_FLAG_MUSTBEDIR;
 467        }
 468        for (i = 0; i < len; i++) {
 469                if (p[i] == '/')
 470                        break;
 471        }
 472        if (i == len)
 473                *flags |= EXC_FLAG_NODIR;
 474        *nowildcardlen = simple_length(p);
 475        /*
 476         * we should have excluded the trailing slash from 'p' too,
 477         * but that's one more allocation. Instead just make sure
 478         * nowildcardlen does not exceed real patternlen
 479         */
 480        if (*nowildcardlen > len)
 481                *nowildcardlen = len;
 482        if (*p == '*' && no_wildcard(p + 1))
 483                *flags |= EXC_FLAG_ENDSWITH;
 484        *pattern = p;
 485        *patternlen = len;
 486}
 487
 488void add_exclude(const char *string, const char *base,
 489                 int baselen, struct exclude_list *el, int srcpos)
 490{
 491        struct exclude *x;
 492        int patternlen;
 493        unsigned flags;
 494        int nowildcardlen;
 495
 496        parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
 497        if (flags & EXC_FLAG_MUSTBEDIR) {
 498                FLEXPTR_ALLOC_MEM(x, pattern, string, patternlen);
 499        } else {
 500                x = xmalloc(sizeof(*x));
 501                x->pattern = string;
 502        }
 503        x->patternlen = patternlen;
 504        x->nowildcardlen = nowildcardlen;
 505        x->base = base;
 506        x->baselen = baselen;
 507        x->flags = flags;
 508        x->srcpos = srcpos;
 509        ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
 510        el->excludes[el->nr++] = x;
 511        x->el = el;
 512}
 513
 514static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
 515                                                struct sha1_stat *sha1_stat)
 516{
 517        int pos, len;
 518        unsigned long sz;
 519        enum object_type type;
 520        void *data;
 521
 522        len = strlen(path);
 523        pos = cache_name_pos(path, len);
 524        if (pos < 0)
 525                return NULL;
 526        if (!ce_skip_worktree(active_cache[pos]))
 527                return NULL;
 528        data = read_sha1_file(active_cache[pos]->sha1, &type, &sz);
 529        if (!data || type != OBJ_BLOB) {
 530                free(data);
 531                return NULL;
 532        }
 533        *size = xsize_t(sz);
 534        if (sha1_stat) {
 535                memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
 536                hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
 537        }
 538        return data;
 539}
 540
 541/*
 542 * Frees memory within el which was allocated for exclude patterns and
 543 * the file buffer.  Does not free el itself.
 544 */
 545void clear_exclude_list(struct exclude_list *el)
 546{
 547        int i;
 548
 549        for (i = 0; i < el->nr; i++)
 550                free(el->excludes[i]);
 551        free(el->excludes);
 552        free(el->filebuf);
 553
 554        memset(el, 0, sizeof(*el));
 555}
 556
 557static void trim_trailing_spaces(char *buf)
 558{
 559        char *p, *last_space = NULL;
 560
 561        for (p = buf; *p; p++)
 562                switch (*p) {
 563                case ' ':
 564                        if (!last_space)
 565                                last_space = p;
 566                        break;
 567                case '\\':
 568                        p++;
 569                        if (!*p)
 570                                return;
 571                        /* fallthrough */
 572                default:
 573                        last_space = NULL;
 574                }
 575
 576        if (last_space)
 577                *last_space = '\0';
 578}
 579
 580/*
 581 * Given a subdirectory name and "dir" of the current directory,
 582 * search the subdir in "dir" and return it, or create a new one if it
 583 * does not exist in "dir".
 584 *
 585 * If "name" has the trailing slash, it'll be excluded in the search.
 586 */
 587static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 588                                                    struct untracked_cache_dir *dir,
 589                                                    const char *name, int len)
 590{
 591        int first, last;
 592        struct untracked_cache_dir *d;
 593        if (!dir)
 594                return NULL;
 595        if (len && name[len - 1] == '/')
 596                len--;
 597        first = 0;
 598        last = dir->dirs_nr;
 599        while (last > first) {
 600                int cmp, next = (last + first) >> 1;
 601                d = dir->dirs[next];
 602                cmp = strncmp(name, d->name, len);
 603                if (!cmp && strlen(d->name) > len)
 604                        cmp = -1;
 605                if (!cmp)
 606                        return d;
 607                if (cmp < 0) {
 608                        last = next;
 609                        continue;
 610                }
 611                first = next+1;
 612        }
 613
 614        uc->dir_created++;
 615        FLEX_ALLOC_MEM(d, name, name, len);
 616
 617        ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
 618        memmove(dir->dirs + first + 1, dir->dirs + first,
 619                (dir->dirs_nr - first) * sizeof(*dir->dirs));
 620        dir->dirs_nr++;
 621        dir->dirs[first] = d;
 622        return d;
 623}
 624
 625static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
 626{
 627        int i;
 628        dir->valid = 0;
 629        dir->untracked_nr = 0;
 630        for (i = 0; i < dir->dirs_nr; i++)
 631                do_invalidate_gitignore(dir->dirs[i]);
 632}
 633
 634static void invalidate_gitignore(struct untracked_cache *uc,
 635                                 struct untracked_cache_dir *dir)
 636{
 637        uc->gitignore_invalidated++;
 638        do_invalidate_gitignore(dir);
 639}
 640
 641static void invalidate_directory(struct untracked_cache *uc,
 642                                 struct untracked_cache_dir *dir)
 643{
 644        int i;
 645        uc->dir_invalidated++;
 646        dir->valid = 0;
 647        dir->untracked_nr = 0;
 648        for (i = 0; i < dir->dirs_nr; i++)
 649                dir->dirs[i]->recurse = 0;
 650}
 651
 652/*
 653 * Given a file with name "fname", read it (either from disk, or from
 654 * the index if "check_index" is non-zero), parse it and store the
 655 * exclude rules in "el".
 656 *
 657 * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
 658 * stat data from disk (only valid if add_excludes returns zero). If
 659 * ss_valid is non-zero, "ss" must contain good value as input.
 660 */
 661static int add_excludes(const char *fname, const char *base, int baselen,
 662                        struct exclude_list *el, int check_index,
 663                        struct sha1_stat *sha1_stat)
 664{
 665        struct stat st;
 666        int fd, i, lineno = 1;
 667        size_t size = 0;
 668        char *buf, *entry;
 669
 670        fd = open(fname, O_RDONLY);
 671        if (fd < 0 || fstat(fd, &st) < 0) {
 672                if (errno != ENOENT)
 673                        warn_on_inaccessible(fname);
 674                if (0 <= fd)
 675                        close(fd);
 676                if (!check_index ||
 677                    (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL)
 678                        return -1;
 679                if (size == 0) {
 680                        free(buf);
 681                        return 0;
 682                }
 683                if (buf[size-1] != '\n') {
 684                        buf = xrealloc(buf, st_add(size, 1));
 685                        buf[size++] = '\n';
 686                }
 687        } else {
 688                size = xsize_t(st.st_size);
 689                if (size == 0) {
 690                        if (sha1_stat) {
 691                                fill_stat_data(&sha1_stat->stat, &st);
 692                                hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN);
 693                                sha1_stat->valid = 1;
 694                        }
 695                        close(fd);
 696                        return 0;
 697                }
 698                buf = xmallocz(size);
 699                if (read_in_full(fd, buf, size) != size) {
 700                        free(buf);
 701                        close(fd);
 702                        return -1;
 703                }
 704                buf[size++] = '\n';
 705                close(fd);
 706                if (sha1_stat) {
 707                        int pos;
 708                        if (sha1_stat->valid &&
 709                            !match_stat_data_racy(&the_index, &sha1_stat->stat, &st))
 710                                ; /* no content change, ss->sha1 still good */
 711                        else if (check_index &&
 712                                 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
 713                                 !ce_stage(active_cache[pos]) &&
 714                                 ce_uptodate(active_cache[pos]) &&
 715                                 !would_convert_to_git(fname))
 716                                hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
 717                        else
 718                                hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
 719                        fill_stat_data(&sha1_stat->stat, &st);
 720                        sha1_stat->valid = 1;
 721                }
 722        }
 723
 724        el->filebuf = buf;
 725
 726        if (skip_utf8_bom(&buf, size))
 727                size -= buf - el->filebuf;
 728
 729        entry = buf;
 730
 731        for (i = 0; i < size; i++) {
 732                if (buf[i] == '\n') {
 733                        if (entry != buf + i && entry[0] != '#') {
 734                                buf[i - (i && buf[i-1] == '\r')] = 0;
 735                                trim_trailing_spaces(entry);
 736                                add_exclude(entry, base, baselen, el, lineno);
 737                        }
 738                        lineno++;
 739                        entry = buf + i + 1;
 740                }
 741        }
 742        return 0;
 743}
 744
 745int add_excludes_from_file_to_list(const char *fname, const char *base,
 746                                   int baselen, struct exclude_list *el,
 747                                   int check_index)
 748{
 749        return add_excludes(fname, base, baselen, el, check_index, NULL);
 750}
 751
 752struct exclude_list *add_exclude_list(struct dir_struct *dir,
 753                                      int group_type, const char *src)
 754{
 755        struct exclude_list *el;
 756        struct exclude_list_group *group;
 757
 758        group = &dir->exclude_list_group[group_type];
 759        ALLOC_GROW(group->el, group->nr + 1, group->alloc);
 760        el = &group->el[group->nr++];
 761        memset(el, 0, sizeof(*el));
 762        el->src = src;
 763        return el;
 764}
 765
 766/*
 767 * Used to set up core.excludesfile and .git/info/exclude lists.
 768 */
 769static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 770                                     struct sha1_stat *sha1_stat)
 771{
 772        struct exclude_list *el;
 773        /*
 774         * catch setup_standard_excludes() that's called before
 775         * dir->untracked is assigned. That function behaves
 776         * differently when dir->untracked is non-NULL.
 777         */
 778        if (!dir->untracked)
 779                dir->unmanaged_exclude_files++;
 780        el = add_exclude_list(dir, EXC_FILE, fname);
 781        if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 782                die("cannot use %s as an exclude file", fname);
 783}
 784
 785void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 786{
 787        dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
 788        add_excludes_from_file_1(dir, fname, NULL);
 789}
 790
 791int match_basename(const char *basename, int basenamelen,
 792                   const char *pattern, int prefix, int patternlen,
 793                   unsigned flags)
 794{
 795        if (prefix == patternlen) {
 796                if (patternlen == basenamelen &&
 797                    !fspathncmp(pattern, basename, basenamelen))
 798                        return 1;
 799        } else if (flags & EXC_FLAG_ENDSWITH) {
 800                /* "*literal" matching against "fooliteral" */
 801                if (patternlen - 1 <= basenamelen &&
 802                    !fspathncmp(pattern + 1,
 803                                   basename + basenamelen - (patternlen - 1),
 804                                   patternlen - 1))
 805                        return 1;
 806        } else {
 807                if (fnmatch_icase_mem(pattern, patternlen,
 808                                      basename, basenamelen,
 809                                      0) == 0)
 810                        return 1;
 811        }
 812        return 0;
 813}
 814
 815int match_pathname(const char *pathname, int pathlen,
 816                   const char *base, int baselen,
 817                   const char *pattern, int prefix, int patternlen,
 818                   unsigned flags)
 819{
 820        const char *name;
 821        int namelen;
 822
 823        /*
 824         * match with FNM_PATHNAME; the pattern has base implicitly
 825         * in front of it.
 826         */
 827        if (*pattern == '/') {
 828                pattern++;
 829                patternlen--;
 830                prefix--;
 831        }
 832
 833        /*
 834         * baselen does not count the trailing slash. base[] may or
 835         * may not end with a trailing slash though.
 836         */
 837        if (pathlen < baselen + 1 ||
 838            (baselen && pathname[baselen] != '/') ||
 839            fspathncmp(pathname, base, baselen))
 840                return 0;
 841
 842        namelen = baselen ? pathlen - baselen - 1 : pathlen;
 843        name = pathname + pathlen - namelen;
 844
 845        if (prefix) {
 846                /*
 847                 * if the non-wildcard part is longer than the
 848                 * remaining pathname, surely it cannot match.
 849                 */
 850                if (prefix > namelen)
 851                        return 0;
 852
 853                if (fspathncmp(pattern, name, prefix))
 854                        return 0;
 855                pattern += prefix;
 856                patternlen -= prefix;
 857                name    += prefix;
 858                namelen -= prefix;
 859
 860                /*
 861                 * If the whole pattern did not have a wildcard,
 862                 * then our prefix match is all we need; we
 863                 * do not need to call fnmatch at all.
 864                 */
 865                if (!patternlen && !namelen)
 866                        return 1;
 867        }
 868
 869        return fnmatch_icase_mem(pattern, patternlen,
 870                                 name, namelen,
 871                                 WM_PATHNAME) == 0;
 872}
 873
 874/*
 875 * Scan the given exclude list in reverse to see whether pathname
 876 * should be ignored.  The first match (i.e. the last on the list), if
 877 * any, determines the fate.  Returns the exclude_list element which
 878 * matched, or NULL for undecided.
 879 */
 880static struct exclude *last_exclude_matching_from_list(const char *pathname,
 881                                                       int pathlen,
 882                                                       const char *basename,
 883                                                       int *dtype,
 884                                                       struct exclude_list *el)
 885{
 886        struct exclude *exc = NULL; /* undecided */
 887        int i;
 888
 889        if (!el->nr)
 890                return NULL;    /* undefined */
 891
 892        for (i = el->nr - 1; 0 <= i; i--) {
 893                struct exclude *x = el->excludes[i];
 894                const char *exclude = x->pattern;
 895                int prefix = x->nowildcardlen;
 896
 897                if (x->flags & EXC_FLAG_MUSTBEDIR) {
 898                        if (*dtype == DT_UNKNOWN)
 899                                *dtype = get_dtype(NULL, pathname, pathlen);
 900                        if (*dtype != DT_DIR)
 901                                continue;
 902                }
 903
 904                if (x->flags & EXC_FLAG_NODIR) {
 905                        if (match_basename(basename,
 906                                           pathlen - (basename - pathname),
 907                                           exclude, prefix, x->patternlen,
 908                                           x->flags)) {
 909                                exc = x;
 910                                break;
 911                        }
 912                        continue;
 913                }
 914
 915                assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
 916                if (match_pathname(pathname, pathlen,
 917                                   x->base, x->baselen ? x->baselen - 1 : 0,
 918                                   exclude, prefix, x->patternlen, x->flags)) {
 919                        exc = x;
 920                        break;
 921                }
 922        }
 923        return exc;
 924}
 925
 926/*
 927 * Scan the list and let the last match determine the fate.
 928 * Return 1 for exclude, 0 for include and -1 for undecided.
 929 */
 930int is_excluded_from_list(const char *pathname,
 931                          int pathlen, const char *basename, int *dtype,
 932                          struct exclude_list *el)
 933{
 934        struct exclude *exclude;
 935        exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
 936        if (exclude)
 937                return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 938        return -1; /* undecided */
 939}
 940
 941static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir,
 942                const char *pathname, int pathlen, const char *basename,
 943                int *dtype_p)
 944{
 945        int i, j;
 946        struct exclude_list_group *group;
 947        struct exclude *exclude;
 948        for (i = EXC_CMDL; i <= EXC_FILE; i++) {
 949                group = &dir->exclude_list_group[i];
 950                for (j = group->nr - 1; j >= 0; j--) {
 951                        exclude = last_exclude_matching_from_list(
 952                                pathname, pathlen, basename, dtype_p,
 953                                &group->el[j]);
 954                        if (exclude)
 955                                return exclude;
 956                }
 957        }
 958        return NULL;
 959}
 960
 961/*
 962 * Loads the per-directory exclude list for the substring of base
 963 * which has a char length of baselen.
 964 */
 965static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 966{
 967        struct exclude_list_group *group;
 968        struct exclude_list *el;
 969        struct exclude_stack *stk = NULL;
 970        struct untracked_cache_dir *untracked;
 971        int current;
 972
 973        group = &dir->exclude_list_group[EXC_DIRS];
 974
 975        /*
 976         * Pop the exclude lists from the EXCL_DIRS exclude_list_group
 977         * which originate from directories not in the prefix of the
 978         * path being checked.
 979         */
 980        while ((stk = dir->exclude_stack) != NULL) {
 981                if (stk->baselen <= baselen &&
 982                    !strncmp(dir->basebuf.buf, base, stk->baselen))
 983                        break;
 984                el = &group->el[dir->exclude_stack->exclude_ix];
 985                dir->exclude_stack = stk->prev;
 986                dir->exclude = NULL;
 987                free((char *)el->src); /* see strbuf_detach() below */
 988                clear_exclude_list(el);
 989                free(stk);
 990                group->nr--;
 991        }
 992
 993        /* Skip traversing into sub directories if the parent is excluded */
 994        if (dir->exclude)
 995                return;
 996
 997        /*
 998         * Lazy initialization. All call sites currently just
 999         * memset(dir, 0, sizeof(*dir)) before use. Changing all of
1000         * them seems lots of work for little benefit.
1001         */
1002        if (!dir->basebuf.buf)
1003                strbuf_init(&dir->basebuf, PATH_MAX);
1004
1005        /* Read from the parent directories and push them down. */
1006        current = stk ? stk->baselen : -1;
1007        strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
1008        if (dir->untracked)
1009                untracked = stk ? stk->ucd : dir->untracked->root;
1010        else
1011                untracked = NULL;
1012
1013        while (current < baselen) {
1014                const char *cp;
1015                struct sha1_stat sha1_stat;
1016
1017                stk = xcalloc(1, sizeof(*stk));
1018                if (current < 0) {
1019                        cp = base;
1020                        current = 0;
1021                } else {
1022                        cp = strchr(base + current + 1, '/');
1023                        if (!cp)
1024                                die("oops in prep_exclude");
1025                        cp++;
1026                        untracked =
1027                                lookup_untracked(dir->untracked, untracked,
1028                                                 base + current,
1029                                                 cp - base - current);
1030                }
1031                stk->prev = dir->exclude_stack;
1032                stk->baselen = cp - base;
1033                stk->exclude_ix = group->nr;
1034                stk->ucd = untracked;
1035                el = add_exclude_list(dir, EXC_DIRS, NULL);
1036                strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
1037                assert(stk->baselen == dir->basebuf.len);
1038
1039                /* Abort if the directory is excluded */
1040                if (stk->baselen) {
1041                        int dt = DT_DIR;
1042                        dir->basebuf.buf[stk->baselen - 1] = 0;
1043                        dir->exclude = last_exclude_matching_from_lists(dir,
1044                                dir->basebuf.buf, stk->baselen - 1,
1045                                dir->basebuf.buf + current, &dt);
1046                        dir->basebuf.buf[stk->baselen - 1] = '/';
1047                        if (dir->exclude &&
1048                            dir->exclude->flags & EXC_FLAG_NEGATIVE)
1049                                dir->exclude = NULL;
1050                        if (dir->exclude) {
1051                                dir->exclude_stack = stk;
1052                                return;
1053                        }
1054                }
1055
1056                /* Try to read per-directory file */
1057                hashclr(sha1_stat.sha1);
1058                sha1_stat.valid = 0;
1059                if (dir->exclude_per_dir &&
1060                    /*
1061                     * If we know that no files have been added in
1062                     * this directory (i.e. valid_cached_dir() has
1063                     * been executed and set untracked->valid) ..
1064                     */
1065                    (!untracked || !untracked->valid ||
1066                     /*
1067                      * .. and .gitignore does not exist before
1068                      * (i.e. null exclude_sha1). Then we can skip
1069                      * loading .gitignore, which would result in
1070                      * ENOENT anyway.
1071                      */
1072                     !is_null_sha1(untracked->exclude_sha1))) {
1073                        /*
1074                         * dir->basebuf gets reused by the traversal, but we
1075                         * need fname to remain unchanged to ensure the src
1076                         * member of each struct exclude correctly
1077                         * back-references its source file.  Other invocations
1078                         * of add_exclude_list provide stable strings, so we
1079                         * strbuf_detach() and free() here in the caller.
1080                         */
1081                        struct strbuf sb = STRBUF_INIT;
1082                        strbuf_addbuf(&sb, &dir->basebuf);
1083                        strbuf_addstr(&sb, dir->exclude_per_dir);
1084                        el->src = strbuf_detach(&sb, NULL);
1085                        add_excludes(el->src, el->src, stk->baselen, el, 1,
1086                                     untracked ? &sha1_stat : NULL);
1087                }
1088                /*
1089                 * NEEDSWORK: when untracked cache is enabled, prep_exclude()
1090                 * will first be called in valid_cached_dir() then maybe many
1091                 * times more in last_exclude_matching(). When the cache is
1092                 * used, last_exclude_matching() will not be called and
1093                 * reading .gitignore content will be a waste.
1094                 *
1095                 * So when it's called by valid_cached_dir() and we can get
1096                 * .gitignore SHA-1 from the index (i.e. .gitignore is not
1097                 * modified on work tree), we could delay reading the
1098                 * .gitignore content until we absolutely need it in
1099                 * last_exclude_matching(). Be careful about ignore rule
1100                 * order, though, if you do that.
1101                 */
1102                if (untracked &&
1103                    hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) {
1104                        invalidate_gitignore(dir->untracked, untracked);
1105                        hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
1106                }
1107                dir->exclude_stack = stk;
1108                current = stk->baselen;
1109        }
1110        strbuf_setlen(&dir->basebuf, baselen);
1111}
1112
1113/*
1114 * Loads the exclude lists for the directory containing pathname, then
1115 * scans all exclude lists to determine whether pathname is excluded.
1116 * Returns the exclude_list element which matched, or NULL for
1117 * undecided.
1118 */
1119struct exclude *last_exclude_matching(struct dir_struct *dir,
1120                                             const char *pathname,
1121                                             int *dtype_p)
1122{
1123        int pathlen = strlen(pathname);
1124        const char *basename = strrchr(pathname, '/');
1125        basename = (basename) ? basename+1 : pathname;
1126
1127        prep_exclude(dir, pathname, basename-pathname);
1128
1129        if (dir->exclude)
1130                return dir->exclude;
1131
1132        return last_exclude_matching_from_lists(dir, pathname, pathlen,
1133                        basename, dtype_p);
1134}
1135
1136/*
1137 * Loads the exclude lists for the directory containing pathname, then
1138 * scans all exclude lists to determine whether pathname is excluded.
1139 * Returns 1 if true, otherwise 0.
1140 */
1141int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
1142{
1143        struct exclude *exclude =
1144                last_exclude_matching(dir, pathname, dtype_p);
1145        if (exclude)
1146                return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
1147        return 0;
1148}
1149
1150static struct dir_entry *dir_entry_new(const char *pathname, int len)
1151{
1152        struct dir_entry *ent;
1153
1154        FLEX_ALLOC_MEM(ent, name, pathname, len);
1155        ent->len = len;
1156        return ent;
1157}
1158
1159static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
1160{
1161        if (cache_file_exists(pathname, len, ignore_case))
1162                return NULL;
1163
1164        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
1165        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
1166}
1167
1168struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
1169{
1170        if (!cache_name_is_other(pathname, len))
1171                return NULL;
1172
1173        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
1174        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
1175}
1176
1177enum exist_status {
1178        index_nonexistent = 0,
1179        index_directory,
1180        index_gitdir
1181};
1182
1183/*
1184 * Do not use the alphabetically sorted index to look up
1185 * the directory name; instead, use the case insensitive
1186 * directory hash.
1187 */
1188static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
1189{
1190        struct cache_entry *ce;
1191
1192        if (cache_dir_exists(dirname, len))
1193                return index_directory;
1194
1195        ce = cache_file_exists(dirname, len, ignore_case);
1196        if (ce && S_ISGITLINK(ce->ce_mode))
1197                return index_gitdir;
1198
1199        return index_nonexistent;
1200}
1201
1202/*
1203 * The index sorts alphabetically by entry name, which
1204 * means that a gitlink sorts as '\0' at the end, while
1205 * a directory (which is defined not as an entry, but as
1206 * the files it contains) will sort with the '/' at the
1207 * end.
1208 */
1209static enum exist_status directory_exists_in_index(const char *dirname, int len)
1210{
1211        int pos;
1212
1213        if (ignore_case)
1214                return directory_exists_in_index_icase(dirname, len);
1215
1216        pos = cache_name_pos(dirname, len);
1217        if (pos < 0)
1218                pos = -pos-1;
1219        while (pos < active_nr) {
1220                const struct cache_entry *ce = active_cache[pos++];
1221                unsigned char endchar;
1222
1223                if (strncmp(ce->name, dirname, len))
1224                        break;
1225                endchar = ce->name[len];
1226                if (endchar > '/')
1227                        break;
1228                if (endchar == '/')
1229                        return index_directory;
1230                if (!endchar && S_ISGITLINK(ce->ce_mode))
1231                        return index_gitdir;
1232        }
1233        return index_nonexistent;
1234}
1235
1236/*
1237 * When we find a directory when traversing the filesystem, we
1238 * have three distinct cases:
1239 *
1240 *  - ignore it
1241 *  - see it as a directory
1242 *  - recurse into it
1243 *
1244 * and which one we choose depends on a combination of existing
1245 * git index contents and the flags passed into the directory
1246 * traversal routine.
1247 *
1248 * Case 1: If we *already* have entries in the index under that
1249 * directory name, we always recurse into the directory to see
1250 * all the files.
1251 *
1252 * Case 2: If we *already* have that directory name as a gitlink,
1253 * we always continue to see it as a gitlink, regardless of whether
1254 * there is an actual git directory there or not (it might not
1255 * be checked out as a subproject!)
1256 *
1257 * Case 3: if we didn't have it in the index previously, we
1258 * have a few sub-cases:
1259 *
1260 *  (a) if "show_other_directories" is true, we show it as
1261 *      just a directory, unless "hide_empty_directories" is
1262 *      also true, in which case we need to check if it contains any
1263 *      untracked and / or ignored files.
1264 *  (b) if it looks like a git directory, and we don't have
1265 *      'no_gitlinks' set we treat it as a gitlink, and show it
1266 *      as a directory.
1267 *  (c) otherwise, we recurse into it.
1268 */
1269static enum path_treatment treat_directory(struct dir_struct *dir,
1270        struct untracked_cache_dir *untracked,
1271        const char *dirname, int len, int baselen, int exclude,
1272        const struct path_simplify *simplify)
1273{
1274        /* The "len-1" is to strip the final '/' */
1275        switch (directory_exists_in_index(dirname, len-1)) {
1276        case index_directory:
1277                return path_recurse;
1278
1279        case index_gitdir:
1280                return path_none;
1281
1282        case index_nonexistent:
1283                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
1284                        break;
1285                if (!(dir->flags & DIR_NO_GITLINKS)) {
1286                        unsigned char sha1[20];
1287                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
1288                                return path_untracked;
1289                }
1290                return path_recurse;
1291        }
1292
1293        /* This is the "show_other_directories" case */
1294
1295        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
1296                return exclude ? path_excluded : path_untracked;
1297
1298        untracked = lookup_untracked(dir->untracked, untracked,
1299                                     dirname + baselen, len - baselen);
1300        return read_directory_recursive(dir, dirname, len,
1301                                        untracked, 1, simplify);
1302}
1303
1304/*
1305 * This is an inexact early pruning of any recursive directory
1306 * reading - if the path cannot possibly be in the pathspec,
1307 * return true, and we'll skip it early.
1308 */
1309static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
1310{
1311        if (simplify) {
1312                for (;;) {
1313                        const char *match = simplify->path;
1314                        int len = simplify->len;
1315
1316                        if (!match)
1317                                break;
1318                        if (len > pathlen)
1319                                len = pathlen;
1320                        if (!memcmp(path, match, len))
1321                                return 0;
1322                        simplify++;
1323                }
1324                return 1;
1325        }
1326        return 0;
1327}
1328
1329/*
1330 * This function tells us whether an excluded path matches a
1331 * list of "interesting" pathspecs. That is, whether a path matched
1332 * by any of the pathspecs could possibly be ignored by excluding
1333 * the specified path. This can happen if:
1334 *
1335 *   1. the path is mentioned explicitly in the pathspec
1336 *
1337 *   2. the path is a directory prefix of some element in the
1338 *      pathspec
1339 */
1340static int exclude_matches_pathspec(const char *path, int len,
1341                const struct path_simplify *simplify)
1342{
1343        if (simplify) {
1344                for (; simplify->path; simplify++) {
1345                        if (len == simplify->len
1346                            && !memcmp(path, simplify->path, len))
1347                                return 1;
1348                        if (len < simplify->len
1349                            && simplify->path[len] == '/'
1350                            && !memcmp(path, simplify->path, len))
1351                                return 1;
1352                }
1353        }
1354        return 0;
1355}
1356
1357static int get_index_dtype(const char *path, int len)
1358{
1359        int pos;
1360        const struct cache_entry *ce;
1361
1362        ce = cache_file_exists(path, len, 0);
1363        if (ce) {
1364                if (!ce_uptodate(ce))
1365                        return DT_UNKNOWN;
1366                if (S_ISGITLINK(ce->ce_mode))
1367                        return DT_DIR;
1368                /*
1369                 * Nobody actually cares about the
1370                 * difference between DT_LNK and DT_REG
1371                 */
1372                return DT_REG;
1373        }
1374
1375        /* Try to look it up as a directory */
1376        pos = cache_name_pos(path, len);
1377        if (pos >= 0)
1378                return DT_UNKNOWN;
1379        pos = -pos-1;
1380        while (pos < active_nr) {
1381                ce = active_cache[pos++];
1382                if (strncmp(ce->name, path, len))
1383                        break;
1384                if (ce->name[len] > '/')
1385                        break;
1386                if (ce->name[len] < '/')
1387                        continue;
1388                if (!ce_uptodate(ce))
1389                        break;  /* continue? */
1390                return DT_DIR;
1391        }
1392        return DT_UNKNOWN;
1393}
1394
1395static int get_dtype(struct dirent *de, const char *path, int len)
1396{
1397        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
1398        struct stat st;
1399
1400        if (dtype != DT_UNKNOWN)
1401                return dtype;
1402        dtype = get_index_dtype(path, len);
1403        if (dtype != DT_UNKNOWN)
1404                return dtype;
1405        if (lstat(path, &st))
1406                return dtype;
1407        if (S_ISREG(st.st_mode))
1408                return DT_REG;
1409        if (S_ISDIR(st.st_mode))
1410                return DT_DIR;
1411        if (S_ISLNK(st.st_mode))
1412                return DT_LNK;
1413        return dtype;
1414}
1415
1416static enum path_treatment treat_one_path(struct dir_struct *dir,
1417                                          struct untracked_cache_dir *untracked,
1418                                          struct strbuf *path,
1419                                          int baselen,
1420                                          const struct path_simplify *simplify,
1421                                          int dtype, struct dirent *de)
1422{
1423        int exclude;
1424        int has_path_in_index = !!cache_file_exists(path->buf, path->len, ignore_case);
1425
1426        if (dtype == DT_UNKNOWN)
1427                dtype = get_dtype(de, path->buf, path->len);
1428
1429        /* Always exclude indexed files */
1430        if (dtype != DT_DIR && has_path_in_index)
1431                return path_none;
1432
1433        /*
1434         * When we are looking at a directory P in the working tree,
1435         * there are three cases:
1436         *
1437         * (1) P exists in the index.  Everything inside the directory P in
1438         * the working tree needs to go when P is checked out from the
1439         * index.
1440         *
1441         * (2) P does not exist in the index, but there is P/Q in the index.
1442         * We know P will stay a directory when we check out the contents
1443         * of the index, but we do not know yet if there is a directory
1444         * P/Q in the working tree to be killed, so we need to recurse.
1445         *
1446         * (3) P does not exist in the index, and there is no P/Q in the index
1447         * to require P to be a directory, either.  Only in this case, we
1448         * know that everything inside P will not be killed without
1449         * recursing.
1450         */
1451        if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
1452            (dtype == DT_DIR) &&
1453            !has_path_in_index &&
1454            (directory_exists_in_index(path->buf, path->len) == index_nonexistent))
1455                return path_none;
1456
1457        exclude = is_excluded(dir, path->buf, &dtype);
1458
1459        /*
1460         * Excluded? If we don't explicitly want to show
1461         * ignored files, ignore it
1462         */
1463        if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
1464                return path_excluded;
1465
1466        switch (dtype) {
1467        default:
1468                return path_none;
1469        case DT_DIR:
1470                strbuf_addch(path, '/');
1471                return treat_directory(dir, untracked, path->buf, path->len,
1472                                       baselen, exclude, simplify);
1473        case DT_REG:
1474        case DT_LNK:
1475                return exclude ? path_excluded : path_untracked;
1476        }
1477}
1478
1479static enum path_treatment treat_path_fast(struct dir_struct *dir,
1480                                           struct untracked_cache_dir *untracked,
1481                                           struct cached_dir *cdir,
1482                                           struct strbuf *path,
1483                                           int baselen,
1484                                           const struct path_simplify *simplify)
1485{
1486        strbuf_setlen(path, baselen);
1487        if (!cdir->ucd) {
1488                strbuf_addstr(path, cdir->file);
1489                return path_untracked;
1490        }
1491        strbuf_addstr(path, cdir->ucd->name);
1492        /* treat_one_path() does this before it calls treat_directory() */
1493        strbuf_complete(path, '/');
1494        if (cdir->ucd->check_only)
1495                /*
1496                 * check_only is set as a result of treat_directory() getting
1497                 * to its bottom. Verify again the same set of directories
1498                 * with check_only set.
1499                 */
1500                return read_directory_recursive(dir, path->buf, path->len,
1501                                                cdir->ucd, 1, simplify);
1502        /*
1503         * We get path_recurse in the first run when
1504         * directory_exists_in_index() returns index_nonexistent. We
1505         * are sure that new changes in the index does not impact the
1506         * outcome. Return now.
1507         */
1508        return path_recurse;
1509}
1510
1511static enum path_treatment treat_path(struct dir_struct *dir,
1512                                      struct untracked_cache_dir *untracked,
1513                                      struct cached_dir *cdir,
1514                                      struct strbuf *path,
1515                                      int baselen,
1516                                      const struct path_simplify *simplify)
1517{
1518        int dtype;
1519        struct dirent *de = cdir->de;
1520
1521        if (!de)
1522                return treat_path_fast(dir, untracked, cdir, path,
1523                                       baselen, simplify);
1524        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
1525                return path_none;
1526        strbuf_setlen(path, baselen);
1527        strbuf_addstr(path, de->d_name);
1528        if (simplify_away(path->buf, path->len, simplify))
1529                return path_none;
1530
1531        dtype = DTYPE(de);
1532        return treat_one_path(dir, untracked, path, baselen, simplify, dtype, de);
1533}
1534
1535static void add_untracked(struct untracked_cache_dir *dir, const char *name)
1536{
1537        if (!dir)
1538                return;
1539        ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
1540                   dir->untracked_alloc);
1541        dir->untracked[dir->untracked_nr++] = xstrdup(name);
1542}
1543
1544static int valid_cached_dir(struct dir_struct *dir,
1545                            struct untracked_cache_dir *untracked,
1546                            struct strbuf *path,
1547                            int check_only)
1548{
1549        struct stat st;
1550
1551        if (!untracked)
1552                return 0;
1553
1554        if (stat(path->len ? path->buf : ".", &st)) {
1555                invalidate_directory(dir->untracked, untracked);
1556                memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
1557                return 0;
1558        }
1559        if (!untracked->valid ||
1560            match_stat_data_racy(&the_index, &untracked->stat_data, &st)) {
1561                if (untracked->valid)
1562                        invalidate_directory(dir->untracked, untracked);
1563                fill_stat_data(&untracked->stat_data, &st);
1564                return 0;
1565        }
1566
1567        if (untracked->check_only != !!check_only) {
1568                invalidate_directory(dir->untracked, untracked);
1569                return 0;
1570        }
1571
1572        /*
1573         * prep_exclude will be called eventually on this directory,
1574         * but it's called much later in last_exclude_matching(). We
1575         * need it now to determine the validity of the cache for this
1576         * path. The next calls will be nearly no-op, the way
1577         * prep_exclude() is designed.
1578         */
1579        if (path->len && path->buf[path->len - 1] != '/') {
1580                strbuf_addch(path, '/');
1581                prep_exclude(dir, path->buf, path->len);
1582                strbuf_setlen(path, path->len - 1);
1583        } else
1584                prep_exclude(dir, path->buf, path->len);
1585
1586        /* hopefully prep_exclude() haven't invalidated this entry... */
1587        return untracked->valid;
1588}
1589
1590static int open_cached_dir(struct cached_dir *cdir,
1591                           struct dir_struct *dir,
1592                           struct untracked_cache_dir *untracked,
1593                           struct strbuf *path,
1594                           int check_only)
1595{
1596        memset(cdir, 0, sizeof(*cdir));
1597        cdir->untracked = untracked;
1598        if (valid_cached_dir(dir, untracked, path, check_only))
1599                return 0;
1600        cdir->fdir = opendir(path->len ? path->buf : ".");
1601        if (dir->untracked)
1602                dir->untracked->dir_opened++;
1603        if (!cdir->fdir)
1604                return -1;
1605        return 0;
1606}
1607
1608static int read_cached_dir(struct cached_dir *cdir)
1609{
1610        if (cdir->fdir) {
1611                cdir->de = readdir(cdir->fdir);
1612                if (!cdir->de)
1613                        return -1;
1614                return 0;
1615        }
1616        while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
1617                struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
1618                if (!d->recurse) {
1619                        cdir->nr_dirs++;
1620                        continue;
1621                }
1622                cdir->ucd = d;
1623                cdir->nr_dirs++;
1624                return 0;
1625        }
1626        cdir->ucd = NULL;
1627        if (cdir->nr_files < cdir->untracked->untracked_nr) {
1628                struct untracked_cache_dir *d = cdir->untracked;
1629                cdir->file = d->untracked[cdir->nr_files++];
1630                return 0;
1631        }
1632        return -1;
1633}
1634
1635static void close_cached_dir(struct cached_dir *cdir)
1636{
1637        if (cdir->fdir)
1638                closedir(cdir->fdir);
1639        /*
1640         * We have gone through this directory and found no untracked
1641         * entries. Mark it valid.
1642         */
1643        if (cdir->untracked) {
1644                cdir->untracked->valid = 1;
1645                cdir->untracked->recurse = 1;
1646        }
1647}
1648
1649/*
1650 * Read a directory tree. We currently ignore anything but
1651 * directories, regular files and symlinks. That's because git
1652 * doesn't handle them at all yet. Maybe that will change some
1653 * day.
1654 *
1655 * Also, we ignore the name ".git" (even if it is not a directory).
1656 * That likely will not change.
1657 *
1658 * Returns the most significant path_treatment value encountered in the scan.
1659 */
1660static enum path_treatment read_directory_recursive(struct dir_struct *dir,
1661                                    const char *base, int baselen,
1662                                    struct untracked_cache_dir *untracked, int check_only,
1663                                    const struct path_simplify *simplify)
1664{
1665        struct cached_dir cdir;
1666        enum path_treatment state, subdir_state, dir_state = path_none;
1667        struct strbuf path = STRBUF_INIT;
1668
1669        strbuf_add(&path, base, baselen);
1670
1671        if (open_cached_dir(&cdir, dir, untracked, &path, check_only))
1672                goto out;
1673
1674        if (untracked)
1675                untracked->check_only = !!check_only;
1676
1677        while (!read_cached_dir(&cdir)) {
1678                /* check how the file or directory should be treated */
1679                state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
1680
1681                if (state > dir_state)
1682                        dir_state = state;
1683
1684                /* recurse into subdir if instructed by treat_path */
1685                if (state == path_recurse) {
1686                        struct untracked_cache_dir *ud;
1687                        ud = lookup_untracked(dir->untracked, untracked,
1688                                              path.buf + baselen,
1689                                              path.len - baselen);
1690                        subdir_state =
1691                                read_directory_recursive(dir, path.buf, path.len,
1692                                                         ud, check_only, simplify);
1693                        if (subdir_state > dir_state)
1694                                dir_state = subdir_state;
1695                }
1696
1697                if (check_only) {
1698                        /* abort early if maximum state has been reached */
1699                        if (dir_state == path_untracked) {
1700                                if (cdir.fdir)
1701                                        add_untracked(untracked, path.buf + baselen);
1702                                break;
1703                        }
1704                        /* skip the dir_add_* part */
1705                        continue;
1706                }
1707
1708                /* add the path to the appropriate result list */
1709                switch (state) {
1710                case path_excluded:
1711                        if (dir->flags & DIR_SHOW_IGNORED)
1712                                dir_add_name(dir, path.buf, path.len);
1713                        else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
1714                                ((dir->flags & DIR_COLLECT_IGNORED) &&
1715                                exclude_matches_pathspec(path.buf, path.len,
1716                                        simplify)))
1717                                dir_add_ignored(dir, path.buf, path.len);
1718                        break;
1719
1720                case path_untracked:
1721                        if (dir->flags & DIR_SHOW_IGNORED)
1722                                break;
1723                        dir_add_name(dir, path.buf, path.len);
1724                        if (cdir.fdir)
1725                                add_untracked(untracked, path.buf + baselen);
1726                        break;
1727
1728                default:
1729                        break;
1730                }
1731        }
1732        close_cached_dir(&cdir);
1733 out:
1734        strbuf_release(&path);
1735
1736        return dir_state;
1737}
1738
1739static int cmp_name(const void *p1, const void *p2)
1740{
1741        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1742        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1743
1744        return name_compare(e1->name, e1->len, e2->name, e2->len);
1745}
1746
1747static struct path_simplify *create_simplify(const char **pathspec)
1748{
1749        int nr, alloc = 0;
1750        struct path_simplify *simplify = NULL;
1751
1752        if (!pathspec)
1753                return NULL;
1754
1755        for (nr = 0 ; ; nr++) {
1756                const char *match;
1757                ALLOC_GROW(simplify, nr + 1, alloc);
1758                match = *pathspec++;
1759                if (!match)
1760                        break;
1761                simplify[nr].path = match;
1762                simplify[nr].len = simple_length(match);
1763        }
1764        simplify[nr].path = NULL;
1765        simplify[nr].len = 0;
1766        return simplify;
1767}
1768
1769static void free_simplify(struct path_simplify *simplify)
1770{
1771        free(simplify);
1772}
1773
1774static int treat_leading_path(struct dir_struct *dir,
1775                              const char *path, int len,
1776                              const struct path_simplify *simplify)
1777{
1778        struct strbuf sb = STRBUF_INIT;
1779        int baselen, rc = 0;
1780        const char *cp;
1781        int old_flags = dir->flags;
1782
1783        while (len && path[len - 1] == '/')
1784                len--;
1785        if (!len)
1786                return 1;
1787        baselen = 0;
1788        dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES;
1789        while (1) {
1790                cp = path + baselen + !!baselen;
1791                cp = memchr(cp, '/', path + len - cp);
1792                if (!cp)
1793                        baselen = len;
1794                else
1795                        baselen = cp - path;
1796                strbuf_setlen(&sb, 0);
1797                strbuf_add(&sb, path, baselen);
1798                if (!is_directory(sb.buf))
1799                        break;
1800                if (simplify_away(sb.buf, sb.len, simplify))
1801                        break;
1802                if (treat_one_path(dir, NULL, &sb, baselen, simplify,
1803                                   DT_DIR, NULL) == path_none)
1804                        break; /* do not recurse into it */
1805                if (len <= baselen) {
1806                        rc = 1;
1807                        break; /* finished checking */
1808                }
1809        }
1810        strbuf_release(&sb);
1811        dir->flags = old_flags;
1812        return rc;
1813}
1814
1815static const char *get_ident_string(void)
1816{
1817        static struct strbuf sb = STRBUF_INIT;
1818        struct utsname uts;
1819
1820        if (sb.len)
1821                return sb.buf;
1822        if (uname(&uts) < 0)
1823                die_errno(_("failed to get kernel name and information"));
1824        strbuf_addf(&sb, "Location %s, system %s", get_git_work_tree(),
1825                    uts.sysname);
1826        return sb.buf;
1827}
1828
1829static int ident_in_untracked(const struct untracked_cache *uc)
1830{
1831        /*
1832         * Previous git versions may have saved many NUL separated
1833         * strings in the "ident" field, but it is insane to manage
1834         * many locations, so just take care of the first one.
1835         */
1836
1837        return !strcmp(uc->ident.buf, get_ident_string());
1838}
1839
1840static void set_untracked_ident(struct untracked_cache *uc)
1841{
1842        strbuf_reset(&uc->ident);
1843        strbuf_addstr(&uc->ident, get_ident_string());
1844
1845        /*
1846         * This strbuf used to contain a list of NUL separated
1847         * strings, so save NUL too for backward compatibility.
1848         */
1849        strbuf_addch(&uc->ident, 0);
1850}
1851
1852static void new_untracked_cache(struct index_state *istate)
1853{
1854        struct untracked_cache *uc = xcalloc(1, sizeof(*uc));
1855        strbuf_init(&uc->ident, 100);
1856        uc->exclude_per_dir = ".gitignore";
1857        /* should be the same flags used by git-status */
1858        uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
1859        set_untracked_ident(uc);
1860        istate->untracked = uc;
1861        istate->cache_changed |= UNTRACKED_CHANGED;
1862}
1863
1864void add_untracked_cache(struct index_state *istate)
1865{
1866        if (!istate->untracked) {
1867                new_untracked_cache(istate);
1868        } else {
1869                if (!ident_in_untracked(istate->untracked)) {
1870                        free_untracked_cache(istate->untracked);
1871                        new_untracked_cache(istate);
1872                }
1873        }
1874}
1875
1876void remove_untracked_cache(struct index_state *istate)
1877{
1878        if (istate->untracked) {
1879                free_untracked_cache(istate->untracked);
1880                istate->untracked = NULL;
1881                istate->cache_changed |= UNTRACKED_CHANGED;
1882        }
1883}
1884
1885static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
1886                                                      int base_len,
1887                                                      const struct pathspec *pathspec)
1888{
1889        struct untracked_cache_dir *root;
1890
1891        if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE"))
1892                return NULL;
1893
1894        /*
1895         * We only support $GIT_DIR/info/exclude and core.excludesfile
1896         * as the global ignore rule files. Any other additions
1897         * (e.g. from command line) invalidate the cache. This
1898         * condition also catches running setup_standard_excludes()
1899         * before setting dir->untracked!
1900         */
1901        if (dir->unmanaged_exclude_files)
1902                return NULL;
1903
1904        /*
1905         * Optimize for the main use case only: whole-tree git
1906         * status. More work involved in treat_leading_path() if we
1907         * use cache on just a subset of the worktree. pathspec
1908         * support could make the matter even worse.
1909         */
1910        if (base_len || (pathspec && pathspec->nr))
1911                return NULL;
1912
1913        /* Different set of flags may produce different results */
1914        if (dir->flags != dir->untracked->dir_flags ||
1915            /*
1916             * See treat_directory(), case index_nonexistent. Without
1917             * this flag, we may need to also cache .git file content
1918             * for the resolve_gitlink_ref() call, which we don't.
1919             */
1920            !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) ||
1921            /* We don't support collecting ignore files */
1922            (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO |
1923                           DIR_COLLECT_IGNORED)))
1924                return NULL;
1925
1926        /*
1927         * If we use .gitignore in the cache and now you change it to
1928         * .gitexclude, everything will go wrong.
1929         */
1930        if (dir->exclude_per_dir != dir->untracked->exclude_per_dir &&
1931            strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir))
1932                return NULL;
1933
1934        /*
1935         * EXC_CMDL is not considered in the cache. If people set it,
1936         * skip the cache.
1937         */
1938        if (dir->exclude_list_group[EXC_CMDL].nr)
1939                return NULL;
1940
1941        if (!ident_in_untracked(dir->untracked)) {
1942                warning(_("Untracked cache is disabled on this system or location."));
1943                return NULL;
1944        }
1945
1946        if (!dir->untracked->root) {
1947                const int len = sizeof(*dir->untracked->root);
1948                dir->untracked->root = xmalloc(len);
1949                memset(dir->untracked->root, 0, len);
1950        }
1951
1952        /* Validate $GIT_DIR/info/exclude and core.excludesfile */
1953        root = dir->untracked->root;
1954        if (hashcmp(dir->ss_info_exclude.sha1,
1955                    dir->untracked->ss_info_exclude.sha1)) {
1956                invalidate_gitignore(dir->untracked, root);
1957                dir->untracked->ss_info_exclude = dir->ss_info_exclude;
1958        }
1959        if (hashcmp(dir->ss_excludes_file.sha1,
1960                    dir->untracked->ss_excludes_file.sha1)) {
1961                invalidate_gitignore(dir->untracked, root);
1962                dir->untracked->ss_excludes_file = dir->ss_excludes_file;
1963        }
1964
1965        /* Make sure this directory is not dropped out at saving phase */
1966        root->recurse = 1;
1967        return root;
1968}
1969
1970int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
1971{
1972        struct path_simplify *simplify;
1973        struct untracked_cache_dir *untracked;
1974
1975        /*
1976         * Check out create_simplify()
1977         */
1978        if (pathspec)
1979                GUARD_PATHSPEC(pathspec,
1980                               PATHSPEC_FROMTOP |
1981                               PATHSPEC_MAXDEPTH |
1982                               PATHSPEC_LITERAL |
1983                               PATHSPEC_GLOB |
1984                               PATHSPEC_ICASE |
1985                               PATHSPEC_EXCLUDE);
1986
1987        if (has_symlink_leading_path(path, len))
1988                return dir->nr;
1989
1990        /*
1991         * exclude patterns are treated like positive ones in
1992         * create_simplify. Usually exclude patterns should be a
1993         * subset of positive ones, which has no impacts on
1994         * create_simplify().
1995         */
1996        simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
1997        untracked = validate_untracked_cache(dir, len, pathspec);
1998        if (!untracked)
1999                /*
2000                 * make sure untracked cache code path is disabled,
2001                 * e.g. prep_exclude()
2002                 */
2003                dir->untracked = NULL;
2004        if (!len || treat_leading_path(dir, path, len, simplify))
2005                read_directory_recursive(dir, path, len, untracked, 0, simplify);
2006        free_simplify(simplify);
2007        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
2008        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
2009        if (dir->untracked) {
2010                static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
2011                trace_printf_key(&trace_untracked_stats,
2012                                 "node creation: %u\n"
2013                                 "gitignore invalidation: %u\n"
2014                                 "directory invalidation: %u\n"
2015                                 "opendir: %u\n",
2016                                 dir->untracked->dir_created,
2017                                 dir->untracked->gitignore_invalidated,
2018                                 dir->untracked->dir_invalidated,
2019                                 dir->untracked->dir_opened);
2020                if (dir->untracked == the_index.untracked &&
2021                    (dir->untracked->dir_opened ||
2022                     dir->untracked->gitignore_invalidated ||
2023                     dir->untracked->dir_invalidated))
2024                        the_index.cache_changed |= UNTRACKED_CHANGED;
2025                if (dir->untracked != the_index.untracked) {
2026                        free(dir->untracked);
2027                        dir->untracked = NULL;
2028                }
2029        }
2030        return dir->nr;
2031}
2032
2033int file_exists(const char *f)
2034{
2035        struct stat sb;
2036        return lstat(f, &sb) == 0;
2037}
2038
2039static int cmp_icase(char a, char b)
2040{
2041        if (a == b)
2042                return 0;
2043        if (ignore_case)
2044                return toupper(a) - toupper(b);
2045        return a - b;
2046}
2047
2048/*
2049 * Given two normalized paths (a trailing slash is ok), if subdir is
2050 * outside dir, return -1.  Otherwise return the offset in subdir that
2051 * can be used as relative path to dir.
2052 */
2053int dir_inside_of(const char *subdir, const char *dir)
2054{
2055        int offset = 0;
2056
2057        assert(dir && subdir && *dir && *subdir);
2058
2059        while (*dir && *subdir && !cmp_icase(*dir, *subdir)) {
2060                dir++;
2061                subdir++;
2062                offset++;
2063        }
2064
2065        /* hel[p]/me vs hel[l]/yeah */
2066        if (*dir && *subdir)
2067                return -1;
2068
2069        if (!*subdir)
2070                return !*dir ? offset : -1; /* same dir */
2071
2072        /* foo/[b]ar vs foo/[] */
2073        if (is_dir_sep(dir[-1]))
2074                return is_dir_sep(subdir[-1]) ? offset : -1;
2075
2076        /* foo[/]bar vs foo[] */
2077        return is_dir_sep(*subdir) ? offset + 1 : -1;
2078}
2079
2080int is_inside_dir(const char *dir)
2081{
2082        char *cwd;
2083        int rc;
2084
2085        if (!dir)
2086                return 0;
2087
2088        cwd = xgetcwd();
2089        rc = (dir_inside_of(cwd, dir) >= 0);
2090        free(cwd);
2091        return rc;
2092}
2093
2094int is_empty_dir(const char *path)
2095{
2096        DIR *dir = opendir(path);
2097        struct dirent *e;
2098        int ret = 1;
2099
2100        if (!dir)
2101                return 0;
2102
2103        while ((e = readdir(dir)) != NULL)
2104                if (!is_dot_or_dotdot(e->d_name)) {
2105                        ret = 0;
2106                        break;
2107                }
2108
2109        closedir(dir);
2110        return ret;
2111}
2112
2113static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
2114{
2115        DIR *dir;
2116        struct dirent *e;
2117        int ret = 0, original_len = path->len, len, kept_down = 0;
2118        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
2119        int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL);
2120        unsigned char submodule_head[20];
2121
2122        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
2123            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {
2124                /* Do not descend and nuke a nested git work tree. */
2125                if (kept_up)
2126                        *kept_up = 1;
2127                return 0;
2128        }
2129
2130        flag &= ~REMOVE_DIR_KEEP_TOPLEVEL;
2131        dir = opendir(path->buf);
2132        if (!dir) {
2133                if (errno == ENOENT)
2134                        return keep_toplevel ? -1 : 0;
2135                else if (errno == EACCES && !keep_toplevel)
2136                        /*
2137                         * An empty dir could be removable even if it
2138                         * is unreadable:
2139                         */
2140                        return rmdir(path->buf);
2141                else
2142                        return -1;
2143        }
2144        strbuf_complete(path, '/');
2145
2146        len = path->len;
2147        while ((e = readdir(dir)) != NULL) {
2148                struct stat st;
2149                if (is_dot_or_dotdot(e->d_name))
2150                        continue;
2151
2152                strbuf_setlen(path, len);
2153                strbuf_addstr(path, e->d_name);
2154                if (lstat(path->buf, &st)) {
2155                        if (errno == ENOENT)
2156                                /*
2157                                 * file disappeared, which is what we
2158                                 * wanted anyway
2159                                 */
2160                                continue;
2161                        /* fall thru */
2162                } else if (S_ISDIR(st.st_mode)) {
2163                        if (!remove_dir_recurse(path, flag, &kept_down))
2164                                continue; /* happy */
2165                } else if (!only_empty &&
2166                           (!unlink(path->buf) || errno == ENOENT)) {
2167                        continue; /* happy, too */
2168                }
2169
2170                /* path too long, stat fails, or non-directory still exists */
2171                ret = -1;
2172                break;
2173        }
2174        closedir(dir);
2175
2176        strbuf_setlen(path, original_len);
2177        if (!ret && !keep_toplevel && !kept_down)
2178                ret = (!rmdir(path->buf) || errno == ENOENT) ? 0 : -1;
2179        else if (kept_up)
2180                /*
2181                 * report the uplevel that it is not an error that we
2182                 * did not rmdir() our directory.
2183                 */
2184                *kept_up = !ret;
2185        return ret;
2186}
2187
2188int remove_dir_recursively(struct strbuf *path, int flag)
2189{
2190        return remove_dir_recurse(path, flag, NULL);
2191}
2192
2193static GIT_PATH_FUNC(git_path_info_exclude, "info/exclude")
2194
2195void setup_standard_excludes(struct dir_struct *dir)
2196{
2197        const char *path;
2198
2199        dir->exclude_per_dir = ".gitignore";
2200
2201        /* core.excludefile defaulting to $XDG_HOME/git/ignore */
2202        if (!excludes_file)
2203                excludes_file = xdg_config_home("ignore");
2204        if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
2205                add_excludes_from_file_1(dir, excludes_file,
2206                                         dir->untracked ? &dir->ss_excludes_file : NULL);
2207
2208        /* per repository user preference */
2209        path = git_path_info_exclude();
2210        if (!access_or_warn(path, R_OK, 0))
2211                add_excludes_from_file_1(dir, path,
2212                                         dir->untracked ? &dir->ss_info_exclude : NULL);
2213}
2214
2215int remove_path(const char *name)
2216{
2217        char *slash;
2218
2219        if (unlink(name) && errno != ENOENT && errno != ENOTDIR)
2220                return -1;
2221
2222        slash = strrchr(name, '/');
2223        if (slash) {
2224                char *dirs = xstrdup(name);
2225                slash = dirs + (slash - name);
2226                do {
2227                        *slash = '\0';
2228                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
2229                free(dirs);
2230        }
2231        return 0;
2232}
2233
2234/*
2235 * Frees memory within dir which was allocated for exclude lists and
2236 * the exclude_stack.  Does not free dir itself.
2237 */
2238void clear_directory(struct dir_struct *dir)
2239{
2240        int i, j;
2241        struct exclude_list_group *group;
2242        struct exclude_list *el;
2243        struct exclude_stack *stk;
2244
2245        for (i = EXC_CMDL; i <= EXC_FILE; i++) {
2246                group = &dir->exclude_list_group[i];
2247                for (j = 0; j < group->nr; j++) {
2248                        el = &group->el[j];
2249                        if (i == EXC_DIRS)
2250                                free((char *)el->src);
2251                        clear_exclude_list(el);
2252                }
2253                free(group->el);
2254        }
2255
2256        stk = dir->exclude_stack;
2257        while (stk) {
2258                struct exclude_stack *prev = stk->prev;
2259                free(stk);
2260                stk = prev;
2261        }
2262        strbuf_release(&dir->basebuf);
2263}
2264
2265struct ondisk_untracked_cache {
2266        struct stat_data info_exclude_stat;
2267        struct stat_data excludes_file_stat;
2268        uint32_t dir_flags;
2269        unsigned char info_exclude_sha1[20];
2270        unsigned char excludes_file_sha1[20];
2271        char exclude_per_dir[FLEX_ARRAY];
2272};
2273
2274#define ouc_size(len) (offsetof(struct ondisk_untracked_cache, exclude_per_dir) + len + 1)
2275
2276struct write_data {
2277        int index;         /* number of written untracked_cache_dir */
2278        struct ewah_bitmap *check_only; /* from untracked_cache_dir */
2279        struct ewah_bitmap *valid;      /* from untracked_cache_dir */
2280        struct ewah_bitmap *sha1_valid; /* set if exclude_sha1 is not null */
2281        struct strbuf out;
2282        struct strbuf sb_stat;
2283        struct strbuf sb_sha1;
2284};
2285
2286static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
2287{
2288        to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
2289        to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
2290        to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
2291        to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
2292        to->sd_dev        = htonl(from->sd_dev);
2293        to->sd_ino        = htonl(from->sd_ino);
2294        to->sd_uid        = htonl(from->sd_uid);
2295        to->sd_gid        = htonl(from->sd_gid);
2296        to->sd_size       = htonl(from->sd_size);
2297}
2298
2299static void write_one_dir(struct untracked_cache_dir *untracked,
2300                          struct write_data *wd)
2301{
2302        struct stat_data stat_data;
2303        struct strbuf *out = &wd->out;
2304        unsigned char intbuf[16];
2305        unsigned int intlen, value;
2306        int i = wd->index++;
2307
2308        /*
2309         * untracked_nr should be reset whenever valid is clear, but
2310         * for safety..
2311         */
2312        if (!untracked->valid) {
2313                untracked->untracked_nr = 0;
2314                untracked->check_only = 0;
2315        }
2316
2317        if (untracked->check_only)
2318                ewah_set(wd->check_only, i);
2319        if (untracked->valid) {
2320                ewah_set(wd->valid, i);
2321                stat_data_to_disk(&stat_data, &untracked->stat_data);
2322                strbuf_add(&wd->sb_stat, &stat_data, sizeof(stat_data));
2323        }
2324        if (!is_null_sha1(untracked->exclude_sha1)) {
2325                ewah_set(wd->sha1_valid, i);
2326                strbuf_add(&wd->sb_sha1, untracked->exclude_sha1, 20);
2327        }
2328
2329        intlen = encode_varint(untracked->untracked_nr, intbuf);
2330        strbuf_add(out, intbuf, intlen);
2331
2332        /* skip non-recurse directories */
2333        for (i = 0, value = 0; i < untracked->dirs_nr; i++)
2334                if (untracked->dirs[i]->recurse)
2335                        value++;
2336        intlen = encode_varint(value, intbuf);
2337        strbuf_add(out, intbuf, intlen);
2338
2339        strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
2340
2341        for (i = 0; i < untracked->untracked_nr; i++)
2342                strbuf_add(out, untracked->untracked[i],
2343                           strlen(untracked->untracked[i]) + 1);
2344
2345        for (i = 0; i < untracked->dirs_nr; i++)
2346                if (untracked->dirs[i]->recurse)
2347                        write_one_dir(untracked->dirs[i], wd);
2348}
2349
2350void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
2351{
2352        struct ondisk_untracked_cache *ouc;
2353        struct write_data wd;
2354        unsigned char varbuf[16];
2355        int varint_len;
2356        size_t len = strlen(untracked->exclude_per_dir);
2357
2358        FLEX_ALLOC_MEM(ouc, exclude_per_dir, untracked->exclude_per_dir, len);
2359        stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
2360        stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
2361        hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
2362        hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
2363        ouc->dir_flags = htonl(untracked->dir_flags);
2364
2365        varint_len = encode_varint(untracked->ident.len, varbuf);
2366        strbuf_add(out, varbuf, varint_len);
2367        strbuf_addbuf(out, &untracked->ident);
2368
2369        strbuf_add(out, ouc, ouc_size(len));
2370        free(ouc);
2371        ouc = NULL;
2372
2373        if (!untracked->root) {
2374                varint_len = encode_varint(0, varbuf);
2375                strbuf_add(out, varbuf, varint_len);
2376                return;
2377        }
2378
2379        wd.index      = 0;
2380        wd.check_only = ewah_new();
2381        wd.valid      = ewah_new();
2382        wd.sha1_valid = ewah_new();
2383        strbuf_init(&wd.out, 1024);
2384        strbuf_init(&wd.sb_stat, 1024);
2385        strbuf_init(&wd.sb_sha1, 1024);
2386        write_one_dir(untracked->root, &wd);
2387
2388        varint_len = encode_varint(wd.index, varbuf);
2389        strbuf_add(out, varbuf, varint_len);
2390        strbuf_addbuf(out, &wd.out);
2391        ewah_serialize_strbuf(wd.valid, out);
2392        ewah_serialize_strbuf(wd.check_only, out);
2393        ewah_serialize_strbuf(wd.sha1_valid, out);
2394        strbuf_addbuf(out, &wd.sb_stat);
2395        strbuf_addbuf(out, &wd.sb_sha1);
2396        strbuf_addch(out, '\0'); /* safe guard for string lists */
2397
2398        ewah_free(wd.valid);
2399        ewah_free(wd.check_only);
2400        ewah_free(wd.sha1_valid);
2401        strbuf_release(&wd.out);
2402        strbuf_release(&wd.sb_stat);
2403        strbuf_release(&wd.sb_sha1);
2404}
2405
2406static void free_untracked(struct untracked_cache_dir *ucd)
2407{
2408        int i;
2409        if (!ucd)
2410                return;
2411        for (i = 0; i < ucd->dirs_nr; i++)
2412                free_untracked(ucd->dirs[i]);
2413        for (i = 0; i < ucd->untracked_nr; i++)
2414                free(ucd->untracked[i]);
2415        free(ucd->untracked);
2416        free(ucd->dirs);
2417        free(ucd);
2418}
2419
2420void free_untracked_cache(struct untracked_cache *uc)
2421{
2422        if (uc)
2423                free_untracked(uc->root);
2424        free(uc);
2425}
2426
2427struct read_data {
2428        int index;
2429        struct untracked_cache_dir **ucd;
2430        struct ewah_bitmap *check_only;
2431        struct ewah_bitmap *valid;
2432        struct ewah_bitmap *sha1_valid;
2433        const unsigned char *data;
2434        const unsigned char *end;
2435};
2436
2437static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
2438{
2439        to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
2440        to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
2441        to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
2442        to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
2443        to->sd_dev        = get_be32(&from->sd_dev);
2444        to->sd_ino        = get_be32(&from->sd_ino);
2445        to->sd_uid        = get_be32(&from->sd_uid);
2446        to->sd_gid        = get_be32(&from->sd_gid);
2447        to->sd_size       = get_be32(&from->sd_size);
2448}
2449
2450static int read_one_dir(struct untracked_cache_dir **untracked_,
2451                        struct read_data *rd)
2452{
2453        struct untracked_cache_dir ud, *untracked;
2454        const unsigned char *next, *data = rd->data, *end = rd->end;
2455        unsigned int value;
2456        int i, len;
2457
2458        memset(&ud, 0, sizeof(ud));
2459
2460        next = data;
2461        value = decode_varint(&next);
2462        if (next > end)
2463                return -1;
2464        ud.recurse         = 1;
2465        ud.untracked_alloc = value;
2466        ud.untracked_nr    = value;
2467        if (ud.untracked_nr)
2468                ALLOC_ARRAY(ud.untracked, ud.untracked_nr);
2469        data = next;
2470
2471        next = data;
2472        ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
2473        if (next > end)
2474                return -1;
2475        ALLOC_ARRAY(ud.dirs, ud.dirs_nr);
2476        data = next;
2477
2478        len = strlen((const char *)data);
2479        next = data + len + 1;
2480        if (next > rd->end)
2481                return -1;
2482        *untracked_ = untracked = xmalloc(st_add(sizeof(*untracked), len));
2483        memcpy(untracked, &ud, sizeof(ud));
2484        memcpy(untracked->name, data, len + 1);
2485        data = next;
2486
2487        for (i = 0; i < untracked->untracked_nr; i++) {
2488                len = strlen((const char *)data);
2489                next = data + len + 1;
2490                if (next > rd->end)
2491                        return -1;
2492                untracked->untracked[i] = xstrdup((const char*)data);
2493                data = next;
2494        }
2495
2496        rd->ucd[rd->index++] = untracked;
2497        rd->data = data;
2498
2499        for (i = 0; i < untracked->dirs_nr; i++) {
2500                len = read_one_dir(untracked->dirs + i, rd);
2501                if (len < 0)
2502                        return -1;
2503        }
2504        return 0;
2505}
2506
2507static void set_check_only(size_t pos, void *cb)
2508{
2509        struct read_data *rd = cb;
2510        struct untracked_cache_dir *ud = rd->ucd[pos];
2511        ud->check_only = 1;
2512}
2513
2514static void read_stat(size_t pos, void *cb)
2515{
2516        struct read_data *rd = cb;
2517        struct untracked_cache_dir *ud = rd->ucd[pos];
2518        if (rd->data + sizeof(struct stat_data) > rd->end) {
2519                rd->data = rd->end + 1;
2520                return;
2521        }
2522        stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data);
2523        rd->data += sizeof(struct stat_data);
2524        ud->valid = 1;
2525}
2526
2527static void read_sha1(size_t pos, void *cb)
2528{
2529        struct read_data *rd = cb;
2530        struct untracked_cache_dir *ud = rd->ucd[pos];
2531        if (rd->data + 20 > rd->end) {
2532                rd->data = rd->end + 1;
2533                return;
2534        }
2535        hashcpy(ud->exclude_sha1, rd->data);
2536        rd->data += 20;
2537}
2538
2539static void load_sha1_stat(struct sha1_stat *sha1_stat,
2540                           const struct stat_data *stat,
2541                           const unsigned char *sha1)
2542{
2543        stat_data_from_disk(&sha1_stat->stat, stat);
2544        hashcpy(sha1_stat->sha1, sha1);
2545        sha1_stat->valid = 1;
2546}
2547
2548struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
2549{
2550        const struct ondisk_untracked_cache *ouc;
2551        struct untracked_cache *uc;
2552        struct read_data rd;
2553        const unsigned char *next = data, *end = (const unsigned char *)data + sz;
2554        const char *ident;
2555        int ident_len, len;
2556
2557        if (sz <= 1 || end[-1] != '\0')
2558                return NULL;
2559        end--;
2560
2561        ident_len = decode_varint(&next);
2562        if (next + ident_len > end)
2563                return NULL;
2564        ident = (const char *)next;
2565        next += ident_len;
2566
2567        ouc = (const struct ondisk_untracked_cache *)next;
2568        if (next + ouc_size(0) > end)
2569                return NULL;
2570
2571        uc = xcalloc(1, sizeof(*uc));
2572        strbuf_init(&uc->ident, ident_len);
2573        strbuf_add(&uc->ident, ident, ident_len);
2574        load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
2575                       ouc->info_exclude_sha1);
2576        load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
2577                       ouc->excludes_file_sha1);
2578        uc->dir_flags = get_be32(&ouc->dir_flags);
2579        uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
2580        /* NUL after exclude_per_dir is covered by sizeof(*ouc) */
2581        next += ouc_size(strlen(ouc->exclude_per_dir));
2582        if (next >= end)
2583                goto done2;
2584
2585        len = decode_varint(&next);
2586        if (next > end || len == 0)
2587                goto done2;
2588
2589        rd.valid      = ewah_new();
2590        rd.check_only = ewah_new();
2591        rd.sha1_valid = ewah_new();
2592        rd.data       = next;
2593        rd.end        = end;
2594        rd.index      = 0;
2595        ALLOC_ARRAY(rd.ucd, len);
2596
2597        if (read_one_dir(&uc->root, &rd) || rd.index != len)
2598                goto done;
2599
2600        next = rd.data;
2601        len = ewah_read_mmap(rd.valid, next, end - next);
2602        if (len < 0)
2603                goto done;
2604
2605        next += len;
2606        len = ewah_read_mmap(rd.check_only, next, end - next);
2607        if (len < 0)
2608                goto done;
2609
2610        next += len;
2611        len = ewah_read_mmap(rd.sha1_valid, next, end - next);
2612        if (len < 0)
2613                goto done;
2614
2615        ewah_each_bit(rd.check_only, set_check_only, &rd);
2616        rd.data = next + len;
2617        ewah_each_bit(rd.valid, read_stat, &rd);
2618        ewah_each_bit(rd.sha1_valid, read_sha1, &rd);
2619        next = rd.data;
2620
2621done:
2622        free(rd.ucd);
2623        ewah_free(rd.valid);
2624        ewah_free(rd.check_only);
2625        ewah_free(rd.sha1_valid);
2626done2:
2627        if (next != end) {
2628                free_untracked_cache(uc);
2629                uc = NULL;
2630        }
2631        return uc;
2632}
2633
2634static void invalidate_one_directory(struct untracked_cache *uc,
2635                                     struct untracked_cache_dir *ucd)
2636{
2637        uc->dir_invalidated++;
2638        ucd->valid = 0;
2639        ucd->untracked_nr = 0;
2640}
2641
2642/*
2643 * Normally when an entry is added or removed from a directory,
2644 * invalidating that directory is enough. No need to touch its
2645 * ancestors. When a directory is shown as "foo/bar/" in git-status
2646 * however, deleting or adding an entry may have cascading effect.
2647 *
2648 * Say the "foo/bar/file" has become untracked, we need to tell the
2649 * untracked_cache_dir of "foo" that "bar/" is not an untracked
2650 * directory any more (because "bar" is managed by foo as an untracked
2651 * "file").
2652 *
2653 * Similarly, if "foo/bar/file" moves from untracked to tracked and it
2654 * was the last untracked entry in the entire "foo", we should show
2655 * "foo/" instead. Which means we have to invalidate past "bar" up to
2656 * "foo".
2657 *
2658 * This function traverses all directories from root to leaf. If there
2659 * is a chance of one of the above cases happening, we invalidate back
2660 * to root. Otherwise we just invalidate the leaf. There may be a more
2661 * sophisticated way than checking for SHOW_OTHER_DIRECTORIES to
2662 * detect these cases and avoid unnecessary invalidation, for example,
2663 * checking for the untracked entry named "bar/" in "foo", but for now
2664 * stick to something safe and simple.
2665 */
2666static int invalidate_one_component(struct untracked_cache *uc,
2667                                    struct untracked_cache_dir *dir,
2668                                    const char *path, int len)
2669{
2670        const char *rest = strchr(path, '/');
2671
2672        if (rest) {
2673                int component_len = rest - path;
2674                struct untracked_cache_dir *d =
2675                        lookup_untracked(uc, dir, path, component_len);
2676                int ret =
2677                        invalidate_one_component(uc, d, rest + 1,
2678                                                 len - (component_len + 1));
2679                if (ret)
2680                        invalidate_one_directory(uc, dir);
2681                return ret;
2682        }
2683
2684        invalidate_one_directory(uc, dir);
2685        return uc->dir_flags & DIR_SHOW_OTHER_DIRECTORIES;
2686}
2687
2688void untracked_cache_invalidate_path(struct index_state *istate,
2689                                     const char *path)
2690{
2691        if (!istate->untracked || !istate->untracked->root)
2692                return;
2693        invalidate_one_component(istate->untracked, istate->untracked->root,
2694                                 path, strlen(path));
2695}
2696
2697void untracked_cache_remove_from_index(struct index_state *istate,
2698                                       const char *path)
2699{
2700        untracked_cache_invalidate_path(istate, path);
2701}
2702
2703void untracked_cache_add_to_index(struct index_state *istate,
2704                                  const char *path)
2705{
2706        untracked_cache_invalidate_path(istate, path);
2707}