dir.con commit Merge branch 'jn/maint-fast-import-object-reuse' (c835288)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37static int common_prefix(const char **pathspec)
  38{
  39        const char *path, *slash, *next;
  40        int prefix;
  41
  42        if (!pathspec)
  43                return 0;
  44
  45        path = *pathspec;
  46        slash = strrchr(path, '/');
  47        if (!slash)
  48                return 0;
  49
  50        /*
  51         * The first 'prefix' characters of 'path' are common leading
  52         * path components among the pathspecs we have seen so far,
  53         * including the trailing slash.
  54         */
  55        prefix = slash - path + 1;
  56        while ((next = *++pathspec) != NULL) {
  57                int len, last_matching_slash = -1;
  58                for (len = 0; len < prefix && next[len] == path[len]; len++)
  59                        if (next[len] == '/')
  60                                last_matching_slash = len;
  61                if (len == prefix)
  62                        continue;
  63                if (last_matching_slash < 0)
  64                        return 0;
  65                prefix = last_matching_slash + 1;
  66        }
  67        return prefix;
  68}
  69
  70int fill_directory(struct dir_struct *dir, const char **pathspec)
  71{
  72        const char *path;
  73        int len;
  74
  75        /*
  76         * Calculate common prefix for the pathspec, and
  77         * use that to optimize the directory walk
  78         */
  79        len = common_prefix(pathspec);
  80        path = "";
  81
  82        if (len)
  83                path = xmemdupz(*pathspec, len);
  84
  85        /* Read the directory and prune it */
  86        read_directory(dir, path, len, pathspec);
  87        return len;
  88}
  89
  90/*
  91 * Does 'match' match the given name?
  92 * A match is found if
  93 *
  94 * (1) the 'match' string is leading directory of 'name', or
  95 * (2) the 'match' string is a wildcard and matches 'name', or
  96 * (3) the 'match' string is exactly the same as 'name'.
  97 *
  98 * and the return value tells which case it was.
  99 *
 100 * It returns 0 when there is no match.
 101 */
 102static int match_one(const char *match, const char *name, int namelen)
 103{
 104        int matchlen;
 105
 106        /* If the match was just the prefix, we matched */
 107        if (!*match)
 108                return MATCHED_RECURSIVELY;
 109
 110        if (ignore_case) {
 111                for (;;) {
 112                        unsigned char c1 = tolower(*match);
 113                        unsigned char c2 = tolower(*name);
 114                        if (c1 == '\0' || is_glob_special(c1))
 115                                break;
 116                        if (c1 != c2)
 117                                return 0;
 118                        match++;
 119                        name++;
 120                        namelen--;
 121                }
 122        } else {
 123                for (;;) {
 124                        unsigned char c1 = *match;
 125                        unsigned char c2 = *name;
 126                        if (c1 == '\0' || is_glob_special(c1))
 127                                break;
 128                        if (c1 != c2)
 129                                return 0;
 130                        match++;
 131                        name++;
 132                        namelen--;
 133                }
 134        }
 135
 136
 137        /*
 138         * If we don't match the matchstring exactly,
 139         * we need to match by fnmatch
 140         */
 141        matchlen = strlen(match);
 142        if (strncmp_icase(match, name, matchlen))
 143                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 144
 145        if (namelen == matchlen)
 146                return MATCHED_EXACTLY;
 147        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 148                return MATCHED_RECURSIVELY;
 149        return 0;
 150}
 151
 152/*
 153 * Given a name and a list of pathspecs, see if the name matches
 154 * any of the pathspecs.  The caller is also interested in seeing
 155 * all pathspec matches some names it calls this function with
 156 * (otherwise the user could have mistyped the unmatched pathspec),
 157 * and a mark is left in seen[] array for pathspec element that
 158 * actually matched anything.
 159 */
 160int match_pathspec(const char **pathspec, const char *name, int namelen,
 161                int prefix, char *seen)
 162{
 163        int i, retval = 0;
 164
 165        if (!pathspec)
 166                return 1;
 167
 168        name += prefix;
 169        namelen -= prefix;
 170
 171        for (i = 0; pathspec[i] != NULL; i++) {
 172                int how;
 173                const char *match = pathspec[i] + prefix;
 174                if (seen && seen[i] == MATCHED_EXACTLY)
 175                        continue;
 176                how = match_one(match, name, namelen);
 177                if (how) {
 178                        if (retval < how)
 179                                retval = how;
 180                        if (seen && seen[i] < how)
 181                                seen[i] = how;
 182                }
 183        }
 184        return retval;
 185}
 186
 187static int no_wildcard(const char *string)
 188{
 189        return string[strcspn(string, "*?[{\\")] == '\0';
 190}
 191
 192void add_exclude(const char *string, const char *base,
 193                 int baselen, struct exclude_list *which)
 194{
 195        struct exclude *x;
 196        size_t len;
 197        int to_exclude = 1;
 198        int flags = 0;
 199
 200        if (*string == '!') {
 201                to_exclude = 0;
 202                string++;
 203        }
 204        len = strlen(string);
 205        if (len && string[len - 1] == '/') {
 206                char *s;
 207                x = xmalloc(sizeof(*x) + len);
 208                s = (char *)(x+1);
 209                memcpy(s, string, len - 1);
 210                s[len - 1] = '\0';
 211                string = s;
 212                x->pattern = s;
 213                flags = EXC_FLAG_MUSTBEDIR;
 214        } else {
 215                x = xmalloc(sizeof(*x));
 216                x->pattern = string;
 217        }
 218        x->to_exclude = to_exclude;
 219        x->patternlen = strlen(string);
 220        x->base = base;
 221        x->baselen = baselen;
 222        x->flags = flags;
 223        if (!strchr(string, '/'))
 224                x->flags |= EXC_FLAG_NODIR;
 225        if (no_wildcard(string))
 226                x->flags |= EXC_FLAG_NOWILDCARD;
 227        if (*string == '*' && no_wildcard(string+1))
 228                x->flags |= EXC_FLAG_ENDSWITH;
 229        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 230        which->excludes[which->nr++] = x;
 231}
 232
 233static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 234{
 235        int pos, len;
 236        unsigned long sz;
 237        enum object_type type;
 238        void *data;
 239        struct index_state *istate = &the_index;
 240
 241        len = strlen(path);
 242        pos = index_name_pos(istate, path, len);
 243        if (pos < 0)
 244                return NULL;
 245        if (!ce_skip_worktree(istate->cache[pos]))
 246                return NULL;
 247        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 248        if (!data || type != OBJ_BLOB) {
 249                free(data);
 250                return NULL;
 251        }
 252        *size = xsize_t(sz);
 253        return data;
 254}
 255
 256int add_excludes_from_file_to_list(const char *fname,
 257                                   const char *base,
 258                                   int baselen,
 259                                   char **buf_p,
 260                                   struct exclude_list *which,
 261                                   int check_index)
 262{
 263        struct stat st;
 264        int fd, i;
 265        size_t size = 0;
 266        char *buf, *entry;
 267
 268        fd = open(fname, O_RDONLY);
 269        if (fd < 0 || fstat(fd, &st) < 0) {
 270                if (0 <= fd)
 271                        close(fd);
 272                if (!check_index ||
 273                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 274                        return -1;
 275                if (size == 0) {
 276                        free(buf);
 277                        return 0;
 278                }
 279                if (buf[size-1] != '\n') {
 280                        buf = xrealloc(buf, size+1);
 281                        buf[size++] = '\n';
 282                }
 283        }
 284        else {
 285                size = xsize_t(st.st_size);
 286                if (size == 0) {
 287                        close(fd);
 288                        return 0;
 289                }
 290                buf = xmalloc(size+1);
 291                if (read_in_full(fd, buf, size) != size) {
 292                        free(buf);
 293                        close(fd);
 294                        return -1;
 295                }
 296                buf[size++] = '\n';
 297                close(fd);
 298        }
 299
 300        if (buf_p)
 301                *buf_p = buf;
 302        entry = buf;
 303        for (i = 0; i < size; i++) {
 304                if (buf[i] == '\n') {
 305                        if (entry != buf + i && entry[0] != '#') {
 306                                buf[i - (i && buf[i-1] == '\r')] = 0;
 307                                add_exclude(entry, base, baselen, which);
 308                        }
 309                        entry = buf + i + 1;
 310                }
 311        }
 312        return 0;
 313}
 314
 315void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 316{
 317        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 318                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 319                die("cannot use %s as an exclude file", fname);
 320}
 321
 322static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 323{
 324        struct exclude_list *el;
 325        struct exclude_stack *stk = NULL;
 326        int current;
 327
 328        if ((!dir->exclude_per_dir) ||
 329            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 330                return; /* too long a path -- ignore */
 331
 332        /* Pop the ones that are not the prefix of the path being checked. */
 333        el = &dir->exclude_list[EXC_DIRS];
 334        while ((stk = dir->exclude_stack) != NULL) {
 335                if (stk->baselen <= baselen &&
 336                    !strncmp(dir->basebuf, base, stk->baselen))
 337                        break;
 338                dir->exclude_stack = stk->prev;
 339                while (stk->exclude_ix < el->nr)
 340                        free(el->excludes[--el->nr]);
 341                free(stk->filebuf);
 342                free(stk);
 343        }
 344
 345        /* Read from the parent directories and push them down. */
 346        current = stk ? stk->baselen : -1;
 347        while (current < baselen) {
 348                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 349                const char *cp;
 350
 351                if (current < 0) {
 352                        cp = base;
 353                        current = 0;
 354                }
 355                else {
 356                        cp = strchr(base + current + 1, '/');
 357                        if (!cp)
 358                                die("oops in prep_exclude");
 359                        cp++;
 360                }
 361                stk->prev = dir->exclude_stack;
 362                stk->baselen = cp - base;
 363                stk->exclude_ix = el->nr;
 364                memcpy(dir->basebuf + current, base + current,
 365                       stk->baselen - current);
 366                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 367                add_excludes_from_file_to_list(dir->basebuf,
 368                                               dir->basebuf, stk->baselen,
 369                                               &stk->filebuf, el, 1);
 370                dir->exclude_stack = stk;
 371                current = stk->baselen;
 372        }
 373        dir->basebuf[baselen] = '\0';
 374}
 375
 376/* Scan the list and let the last match determine the fate.
 377 * Return 1 for exclude, 0 for include and -1 for undecided.
 378 */
 379int excluded_from_list(const char *pathname,
 380                       int pathlen, const char *basename, int *dtype,
 381                       struct exclude_list *el)
 382{
 383        int i;
 384
 385        if (el->nr) {
 386                for (i = el->nr - 1; 0 <= i; i--) {
 387                        struct exclude *x = el->excludes[i];
 388                        const char *exclude = x->pattern;
 389                        int to_exclude = x->to_exclude;
 390
 391                        if (x->flags & EXC_FLAG_MUSTBEDIR) {
 392                                if (!dtype) {
 393                                        if (!prefixcmp(pathname, exclude) &&
 394                                            pathname[x->patternlen] == '/')
 395                                                return to_exclude;
 396                                        else
 397                                                continue;
 398                                }
 399                                if (*dtype == DT_UNKNOWN)
 400                                        *dtype = get_dtype(NULL, pathname, pathlen);
 401                                if (*dtype != DT_DIR)
 402                                        continue;
 403                        }
 404
 405                        if (x->flags & EXC_FLAG_NODIR) {
 406                                /* match basename */
 407                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 408                                        if (!strcmp_icase(exclude, basename))
 409                                                return to_exclude;
 410                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
 411                                        if (x->patternlen - 1 <= pathlen &&
 412                                            !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
 413                                                return to_exclude;
 414                                } else {
 415                                        if (fnmatch_icase(exclude, basename, 0) == 0)
 416                                                return to_exclude;
 417                                }
 418                        }
 419                        else {
 420                                /* match with FNM_PATHNAME:
 421                                 * exclude has base (baselen long) implicitly
 422                                 * in front of it.
 423                                 */
 424                                int baselen = x->baselen;
 425                                if (*exclude == '/')
 426                                        exclude++;
 427
 428                                if (pathlen < baselen ||
 429                                    (baselen && pathname[baselen-1] != '/') ||
 430                                    strncmp_icase(pathname, x->base, baselen))
 431                                    continue;
 432
 433                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 434                                        if (!strcmp_icase(exclude, pathname + baselen))
 435                                                return to_exclude;
 436                                } else {
 437                                        if (fnmatch_icase(exclude, pathname+baselen,
 438                                                    FNM_PATHNAME) == 0)
 439                                            return to_exclude;
 440                                }
 441                        }
 442                }
 443        }
 444        return -1; /* undecided */
 445}
 446
 447int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 448{
 449        int pathlen = strlen(pathname);
 450        int st;
 451        const char *basename = strrchr(pathname, '/');
 452        basename = (basename) ? basename+1 : pathname;
 453
 454        prep_exclude(dir, pathname, basename-pathname);
 455        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 456                switch (excluded_from_list(pathname, pathlen, basename,
 457                                           dtype_p, &dir->exclude_list[st])) {
 458                case 0:
 459                        return 0;
 460                case 1:
 461                        return 1;
 462                }
 463        }
 464        return 0;
 465}
 466
 467static struct dir_entry *dir_entry_new(const char *pathname, int len)
 468{
 469        struct dir_entry *ent;
 470
 471        ent = xmalloc(sizeof(*ent) + len + 1);
 472        ent->len = len;
 473        memcpy(ent->name, pathname, len);
 474        ent->name[len] = 0;
 475        return ent;
 476}
 477
 478static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 479{
 480        if (cache_name_exists(pathname, len, ignore_case))
 481                return NULL;
 482
 483        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 484        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 485}
 486
 487struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 488{
 489        if (!cache_name_is_other(pathname, len))
 490                return NULL;
 491
 492        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 493        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 494}
 495
 496enum exist_status {
 497        index_nonexistent = 0,
 498        index_directory,
 499        index_gitdir
 500};
 501
 502/*
 503 * Do not use the alphabetically stored index to look up
 504 * the directory name; instead, use the case insensitive
 505 * name hash.
 506 */
 507static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 508{
 509        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 510        unsigned char endchar;
 511
 512        if (!ce)
 513                return index_nonexistent;
 514        endchar = ce->name[len];
 515
 516        /*
 517         * The cache_entry structure returned will contain this dirname
 518         * and possibly additional path components.
 519         */
 520        if (endchar == '/')
 521                return index_directory;
 522
 523        /*
 524         * If there are no additional path components, then this cache_entry
 525         * represents a submodule.  Submodules, despite being directories,
 526         * are stored in the cache without a closing slash.
 527         */
 528        if (!endchar && S_ISGITLINK(ce->ce_mode))
 529                return index_gitdir;
 530
 531        /* This should never be hit, but it exists just in case. */
 532        return index_nonexistent;
 533}
 534
 535/*
 536 * The index sorts alphabetically by entry name, which
 537 * means that a gitlink sorts as '\0' at the end, while
 538 * a directory (which is defined not as an entry, but as
 539 * the files it contains) will sort with the '/' at the
 540 * end.
 541 */
 542static enum exist_status directory_exists_in_index(const char *dirname, int len)
 543{
 544        int pos;
 545
 546        if (ignore_case)
 547                return directory_exists_in_index_icase(dirname, len);
 548
 549        pos = cache_name_pos(dirname, len);
 550        if (pos < 0)
 551                pos = -pos-1;
 552        while (pos < active_nr) {
 553                struct cache_entry *ce = active_cache[pos++];
 554                unsigned char endchar;
 555
 556                if (strncmp(ce->name, dirname, len))
 557                        break;
 558                endchar = ce->name[len];
 559                if (endchar > '/')
 560                        break;
 561                if (endchar == '/')
 562                        return index_directory;
 563                if (!endchar && S_ISGITLINK(ce->ce_mode))
 564                        return index_gitdir;
 565        }
 566        return index_nonexistent;
 567}
 568
 569/*
 570 * When we find a directory when traversing the filesystem, we
 571 * have three distinct cases:
 572 *
 573 *  - ignore it
 574 *  - see it as a directory
 575 *  - recurse into it
 576 *
 577 * and which one we choose depends on a combination of existing
 578 * git index contents and the flags passed into the directory
 579 * traversal routine.
 580 *
 581 * Case 1: If we *already* have entries in the index under that
 582 * directory name, we always recurse into the directory to see
 583 * all the files.
 584 *
 585 * Case 2: If we *already* have that directory name as a gitlink,
 586 * we always continue to see it as a gitlink, regardless of whether
 587 * there is an actual git directory there or not (it might not
 588 * be checked out as a subproject!)
 589 *
 590 * Case 3: if we didn't have it in the index previously, we
 591 * have a few sub-cases:
 592 *
 593 *  (a) if "show_other_directories" is true, we show it as
 594 *      just a directory, unless "hide_empty_directories" is
 595 *      also true and the directory is empty, in which case
 596 *      we just ignore it entirely.
 597 *  (b) if it looks like a git directory, and we don't have
 598 *      'no_gitlinks' set we treat it as a gitlink, and show it
 599 *      as a directory.
 600 *  (c) otherwise, we recurse into it.
 601 */
 602enum directory_treatment {
 603        show_directory,
 604        ignore_directory,
 605        recurse_into_directory
 606};
 607
 608static enum directory_treatment treat_directory(struct dir_struct *dir,
 609        const char *dirname, int len,
 610        const struct path_simplify *simplify)
 611{
 612        /* The "len-1" is to strip the final '/' */
 613        switch (directory_exists_in_index(dirname, len-1)) {
 614        case index_directory:
 615                return recurse_into_directory;
 616
 617        case index_gitdir:
 618                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 619                        return ignore_directory;
 620                return show_directory;
 621
 622        case index_nonexistent:
 623                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 624                        break;
 625                if (!(dir->flags & DIR_NO_GITLINKS)) {
 626                        unsigned char sha1[20];
 627                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 628                                return show_directory;
 629                }
 630                return recurse_into_directory;
 631        }
 632
 633        /* This is the "show_other_directories" case */
 634        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 635                return show_directory;
 636        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 637                return ignore_directory;
 638        return show_directory;
 639}
 640
 641/*
 642 * This is an inexact early pruning of any recursive directory
 643 * reading - if the path cannot possibly be in the pathspec,
 644 * return true, and we'll skip it early.
 645 */
 646static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 647{
 648        if (simplify) {
 649                for (;;) {
 650                        const char *match = simplify->path;
 651                        int len = simplify->len;
 652
 653                        if (!match)
 654                                break;
 655                        if (len > pathlen)
 656                                len = pathlen;
 657                        if (!memcmp(path, match, len))
 658                                return 0;
 659                        simplify++;
 660                }
 661                return 1;
 662        }
 663        return 0;
 664}
 665
 666/*
 667 * This function tells us whether an excluded path matches a
 668 * list of "interesting" pathspecs. That is, whether a path matched
 669 * by any of the pathspecs could possibly be ignored by excluding
 670 * the specified path. This can happen if:
 671 *
 672 *   1. the path is mentioned explicitly in the pathspec
 673 *
 674 *   2. the path is a directory prefix of some element in the
 675 *      pathspec
 676 */
 677static int exclude_matches_pathspec(const char *path, int len,
 678                const struct path_simplify *simplify)
 679{
 680        if (simplify) {
 681                for (; simplify->path; simplify++) {
 682                        if (len == simplify->len
 683                            && !memcmp(path, simplify->path, len))
 684                                return 1;
 685                        if (len < simplify->len
 686                            && simplify->path[len] == '/'
 687                            && !memcmp(path, simplify->path, len))
 688                                return 1;
 689                }
 690        }
 691        return 0;
 692}
 693
 694static int get_index_dtype(const char *path, int len)
 695{
 696        int pos;
 697        struct cache_entry *ce;
 698
 699        ce = cache_name_exists(path, len, 0);
 700        if (ce) {
 701                if (!ce_uptodate(ce))
 702                        return DT_UNKNOWN;
 703                if (S_ISGITLINK(ce->ce_mode))
 704                        return DT_DIR;
 705                /*
 706                 * Nobody actually cares about the
 707                 * difference between DT_LNK and DT_REG
 708                 */
 709                return DT_REG;
 710        }
 711
 712        /* Try to look it up as a directory */
 713        pos = cache_name_pos(path, len);
 714        if (pos >= 0)
 715                return DT_UNKNOWN;
 716        pos = -pos-1;
 717        while (pos < active_nr) {
 718                ce = active_cache[pos++];
 719                if (strncmp(ce->name, path, len))
 720                        break;
 721                if (ce->name[len] > '/')
 722                        break;
 723                if (ce->name[len] < '/')
 724                        continue;
 725                if (!ce_uptodate(ce))
 726                        break;  /* continue? */
 727                return DT_DIR;
 728        }
 729        return DT_UNKNOWN;
 730}
 731
 732static int get_dtype(struct dirent *de, const char *path, int len)
 733{
 734        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 735        struct stat st;
 736
 737        if (dtype != DT_UNKNOWN)
 738                return dtype;
 739        dtype = get_index_dtype(path, len);
 740        if (dtype != DT_UNKNOWN)
 741                return dtype;
 742        if (lstat(path, &st))
 743                return dtype;
 744        if (S_ISREG(st.st_mode))
 745                return DT_REG;
 746        if (S_ISDIR(st.st_mode))
 747                return DT_DIR;
 748        if (S_ISLNK(st.st_mode))
 749                return DT_LNK;
 750        return dtype;
 751}
 752
 753enum path_treatment {
 754        path_ignored,
 755        path_handled,
 756        path_recurse
 757};
 758
 759static enum path_treatment treat_one_path(struct dir_struct *dir,
 760                                          char *path, int *len,
 761                                          const struct path_simplify *simplify,
 762                                          int dtype, struct dirent *de)
 763{
 764        int exclude = excluded(dir, path, &dtype);
 765        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 766            && exclude_matches_pathspec(path, *len, simplify))
 767                dir_add_ignored(dir, path, *len);
 768
 769        /*
 770         * Excluded? If we don't explicitly want to show
 771         * ignored files, ignore it
 772         */
 773        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
 774                return path_ignored;
 775
 776        if (dtype == DT_UNKNOWN)
 777                dtype = get_dtype(de, path, *len);
 778
 779        /*
 780         * Do we want to see just the ignored files?
 781         * We still need to recurse into directories,
 782         * even if we don't ignore them, since the
 783         * directory may contain files that we do..
 784         */
 785        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
 786                if (dtype != DT_DIR)
 787                        return path_ignored;
 788        }
 789
 790        switch (dtype) {
 791        default:
 792                return path_ignored;
 793        case DT_DIR:
 794                memcpy(path + *len, "/", 2);
 795                (*len)++;
 796                switch (treat_directory(dir, path, *len, simplify)) {
 797                case show_directory:
 798                        if (exclude != !!(dir->flags
 799                                          & DIR_SHOW_IGNORED))
 800                                return path_ignored;
 801                        break;
 802                case recurse_into_directory:
 803                        return path_recurse;
 804                case ignore_directory:
 805                        return path_ignored;
 806                }
 807                break;
 808        case DT_REG:
 809        case DT_LNK:
 810                break;
 811        }
 812        return path_handled;
 813}
 814
 815static enum path_treatment treat_path(struct dir_struct *dir,
 816                                      struct dirent *de,
 817                                      char *path, int path_max,
 818                                      int baselen,
 819                                      const struct path_simplify *simplify,
 820                                      int *len)
 821{
 822        int dtype;
 823
 824        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 825                return path_ignored;
 826        *len = strlen(de->d_name);
 827        /* Ignore overly long pathnames! */
 828        if (*len + baselen + 8 > path_max)
 829                return path_ignored;
 830        memcpy(path + baselen, de->d_name, *len + 1);
 831        *len += baselen;
 832        if (simplify_away(path, *len, simplify))
 833                return path_ignored;
 834
 835        dtype = DTYPE(de);
 836        return treat_one_path(dir, path, len, simplify, dtype, de);
 837}
 838
 839/*
 840 * Read a directory tree. We currently ignore anything but
 841 * directories, regular files and symlinks. That's because git
 842 * doesn't handle them at all yet. Maybe that will change some
 843 * day.
 844 *
 845 * Also, we ignore the name ".git" (even if it is not a directory).
 846 * That likely will not change.
 847 */
 848static int read_directory_recursive(struct dir_struct *dir,
 849                                    const char *base, int baselen,
 850                                    int check_only,
 851                                    const struct path_simplify *simplify)
 852{
 853        DIR *fdir = opendir(*base ? base : ".");
 854        int contents = 0;
 855
 856        if (fdir) {
 857                struct dirent *de;
 858                char path[PATH_MAX + 1];
 859                memcpy(path, base, baselen);
 860
 861                while ((de = readdir(fdir)) != NULL) {
 862                        int len;
 863                        switch (treat_path(dir, de, path, sizeof(path),
 864                                           baselen, simplify, &len)) {
 865                        case path_recurse:
 866                                contents += read_directory_recursive
 867                                        (dir, path, len, 0, simplify);
 868                                continue;
 869                        case path_ignored:
 870                                continue;
 871                        case path_handled:
 872                                break;
 873                        }
 874                        contents++;
 875                        if (check_only)
 876                                goto exit_early;
 877                        else
 878                                dir_add_name(dir, path, len);
 879                }
 880exit_early:
 881                closedir(fdir);
 882        }
 883
 884        return contents;
 885}
 886
 887static int cmp_name(const void *p1, const void *p2)
 888{
 889        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
 890        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
 891
 892        return cache_name_compare(e1->name, e1->len,
 893                                  e2->name, e2->len);
 894}
 895
 896/*
 897 * Return the length of the "simple" part of a path match limiter.
 898 */
 899static int simple_length(const char *match)
 900{
 901        int len = -1;
 902
 903        for (;;) {
 904                unsigned char c = *match++;
 905                len++;
 906                if (c == '\0' || is_glob_special(c))
 907                        return len;
 908        }
 909}
 910
 911static struct path_simplify *create_simplify(const char **pathspec)
 912{
 913        int nr, alloc = 0;
 914        struct path_simplify *simplify = NULL;
 915
 916        if (!pathspec)
 917                return NULL;
 918
 919        for (nr = 0 ; ; nr++) {
 920                const char *match;
 921                if (nr >= alloc) {
 922                        alloc = alloc_nr(alloc);
 923                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
 924                }
 925                match = *pathspec++;
 926                if (!match)
 927                        break;
 928                simplify[nr].path = match;
 929                simplify[nr].len = simple_length(match);
 930        }
 931        simplify[nr].path = NULL;
 932        simplify[nr].len = 0;
 933        return simplify;
 934}
 935
 936static void free_simplify(struct path_simplify *simplify)
 937{
 938        free(simplify);
 939}
 940
 941static int treat_leading_path(struct dir_struct *dir,
 942                              const char *path, int len,
 943                              const struct path_simplify *simplify)
 944{
 945        char pathbuf[PATH_MAX];
 946        int baselen, blen;
 947        const char *cp;
 948
 949        while (len && path[len - 1] == '/')
 950                len--;
 951        if (!len)
 952                return 1;
 953        baselen = 0;
 954        while (1) {
 955                cp = path + baselen + !!baselen;
 956                cp = memchr(cp, '/', path + len - cp);
 957                if (!cp)
 958                        baselen = len;
 959                else
 960                        baselen = cp - path;
 961                memcpy(pathbuf, path, baselen);
 962                pathbuf[baselen] = '\0';
 963                if (!is_directory(pathbuf))
 964                        return 0;
 965                if (simplify_away(pathbuf, baselen, simplify))
 966                        return 0;
 967                blen = baselen;
 968                if (treat_one_path(dir, pathbuf, &blen, simplify,
 969                                   DT_DIR, NULL) == path_ignored)
 970                        return 0; /* do not recurse into it */
 971                if (len <= baselen)
 972                        return 1; /* finished checking */
 973        }
 974}
 975
 976int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
 977{
 978        struct path_simplify *simplify;
 979
 980        if (has_symlink_leading_path(path, len))
 981                return dir->nr;
 982
 983        simplify = create_simplify(pathspec);
 984        if (!len || treat_leading_path(dir, path, len, simplify))
 985                read_directory_recursive(dir, path, len, 0, simplify);
 986        free_simplify(simplify);
 987        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 988        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
 989        return dir->nr;
 990}
 991
 992int file_exists(const char *f)
 993{
 994        struct stat sb;
 995        return lstat(f, &sb) == 0;
 996}
 997
 998/*
 999 * get_relative_cwd() gets the prefix of the current working directory
1000 * relative to 'dir'.  If we are not inside 'dir', it returns NULL.
1001 *
1002 * As a convenience, it also returns NULL if 'dir' is already NULL.  The
1003 * reason for this behaviour is that it is natural for functions returning
1004 * directory names to return NULL to say "this directory does not exist"
1005 * or "this directory is invalid".  These cases are usually handled the
1006 * same as if the cwd is not inside 'dir' at all, so get_relative_cwd()
1007 * returns NULL for both of them.
1008 *
1009 * Most notably, get_relative_cwd(buffer, size, get_git_work_tree())
1010 * unifies the handling of "outside work tree" with "no work tree at all".
1011 */
1012char *get_relative_cwd(char *buffer, int size, const char *dir)
1013{
1014        char *cwd = buffer;
1015
1016        if (!dir)
1017                return NULL;
1018        if (!getcwd(buffer, size))
1019                die_errno("can't find the current directory");
1020
1021        if (!is_absolute_path(dir))
1022                dir = make_absolute_path(dir);
1023
1024        while (*dir && *dir == *cwd) {
1025                dir++;
1026                cwd++;
1027        }
1028        if (*dir)
1029                return NULL;
1030        switch (*cwd) {
1031        case '\0':
1032                return cwd;
1033        case '/':
1034                return cwd + 1;
1035        default:
1036                return NULL;
1037        }
1038}
1039
1040int is_inside_dir(const char *dir)
1041{
1042        char buffer[PATH_MAX];
1043        return get_relative_cwd(buffer, sizeof(buffer), dir) != NULL;
1044}
1045
1046int is_empty_dir(const char *path)
1047{
1048        DIR *dir = opendir(path);
1049        struct dirent *e;
1050        int ret = 1;
1051
1052        if (!dir)
1053                return 0;
1054
1055        while ((e = readdir(dir)) != NULL)
1056                if (!is_dot_or_dotdot(e->d_name)) {
1057                        ret = 0;
1058                        break;
1059                }
1060
1061        closedir(dir);
1062        return ret;
1063}
1064
1065int remove_dir_recursively(struct strbuf *path, int flag)
1066{
1067        DIR *dir;
1068        struct dirent *e;
1069        int ret = 0, original_len = path->len, len;
1070        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1071        unsigned char submodule_head[20];
1072
1073        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1074            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head))
1075                /* Do not descend and nuke a nested git work tree. */
1076                return 0;
1077
1078        dir = opendir(path->buf);
1079        if (!dir)
1080                return -1;
1081        if (path->buf[original_len - 1] != '/')
1082                strbuf_addch(path, '/');
1083
1084        len = path->len;
1085        while ((e = readdir(dir)) != NULL) {
1086                struct stat st;
1087                if (is_dot_or_dotdot(e->d_name))
1088                        continue;
1089
1090                strbuf_setlen(path, len);
1091                strbuf_addstr(path, e->d_name);
1092                if (lstat(path->buf, &st))
1093                        ; /* fall thru */
1094                else if (S_ISDIR(st.st_mode)) {
1095                        if (!remove_dir_recursively(path, only_empty))
1096                                continue; /* happy */
1097                } else if (!only_empty && !unlink(path->buf))
1098                        continue; /* happy, too */
1099
1100                /* path too long, stat fails, or non-directory still exists */
1101                ret = -1;
1102                break;
1103        }
1104        closedir(dir);
1105
1106        strbuf_setlen(path, original_len);
1107        if (!ret)
1108                ret = rmdir(path->buf);
1109        return ret;
1110}
1111
1112void setup_standard_excludes(struct dir_struct *dir)
1113{
1114        const char *path;
1115
1116        dir->exclude_per_dir = ".gitignore";
1117        path = git_path("info/exclude");
1118        if (!access(path, R_OK))
1119                add_excludes_from_file(dir, path);
1120        if (excludes_file && !access(excludes_file, R_OK))
1121                add_excludes_from_file(dir, excludes_file);
1122}
1123
1124int remove_path(const char *name)
1125{
1126        char *slash;
1127
1128        if (unlink(name) && errno != ENOENT)
1129                return -1;
1130
1131        slash = strrchr(name, '/');
1132        if (slash) {
1133                char *dirs = xstrdup(name);
1134                slash = dirs + (slash - name);
1135                do {
1136                        *slash = '\0';
1137                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1138                free(dirs);
1139        }
1140        return 0;
1141}
1142