839bc9f61c421427a87bf6144482dc9851de2cbc
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37static size_t common_prefix_len(const char **pathspec)
  38{
  39        const char *n, *first;
  40        size_t max = 0;
  41
  42        if (!pathspec)
  43                return max;
  44
  45        first = *pathspec;
  46        while ((n = *pathspec++)) {
  47                size_t i, len = 0;
  48                for (i = 0; first == n || i < max; i++) {
  49                        char c = n[i];
  50                        if (!c || c != first[i] || is_glob_special(c))
  51                                break;
  52                        if (c == '/')
  53                                len = i + 1;
  54                }
  55                if (first == n || len < max) {
  56                        max = len;
  57                        if (!max)
  58                                break;
  59                }
  60        }
  61        return max;
  62}
  63
  64/*
  65 * Returns a copy of the longest leading path common among all
  66 * pathspecs.
  67 */
  68char *common_prefix(const char **pathspec)
  69{
  70        unsigned long len = common_prefix_len(pathspec);
  71
  72        return len ? xmemdupz(*pathspec, len) : NULL;
  73}
  74
  75int fill_directory(struct dir_struct *dir, const char **pathspec)
  76{
  77        const char *path;
  78        size_t len;
  79
  80        /*
  81         * Calculate common prefix for the pathspec, and
  82         * use that to optimize the directory walk
  83         */
  84        len = common_prefix_len(pathspec);
  85        path = "";
  86
  87        if (len)
  88                path = xmemdupz(*pathspec, len);
  89
  90        /* Read the directory and prune it */
  91        read_directory(dir, path, len, pathspec);
  92        if (*path)
  93                free((char *)path);
  94        return len;
  95}
  96
  97int within_depth(const char *name, int namelen,
  98                        int depth, int max_depth)
  99{
 100        const char *cp = name, *cpe = name + namelen;
 101
 102        while (cp < cpe) {
 103                if (*cp++ != '/')
 104                        continue;
 105                depth++;
 106                if (depth > max_depth)
 107                        return 0;
 108        }
 109        return 1;
 110}
 111
 112/*
 113 * Does 'match' match the given name?
 114 * A match is found if
 115 *
 116 * (1) the 'match' string is leading directory of 'name', or
 117 * (2) the 'match' string is a wildcard and matches 'name', or
 118 * (3) the 'match' string is exactly the same as 'name'.
 119 *
 120 * and the return value tells which case it was.
 121 *
 122 * It returns 0 when there is no match.
 123 */
 124static int match_one(const char *match, const char *name, int namelen)
 125{
 126        int matchlen;
 127
 128        /* If the match was just the prefix, we matched */
 129        if (!*match)
 130                return MATCHED_RECURSIVELY;
 131
 132        if (ignore_case) {
 133                for (;;) {
 134                        unsigned char c1 = tolower(*match);
 135                        unsigned char c2 = tolower(*name);
 136                        if (c1 == '\0' || is_glob_special(c1))
 137                                break;
 138                        if (c1 != c2)
 139                                return 0;
 140                        match++;
 141                        name++;
 142                        namelen--;
 143                }
 144        } else {
 145                for (;;) {
 146                        unsigned char c1 = *match;
 147                        unsigned char c2 = *name;
 148                        if (c1 == '\0' || is_glob_special(c1))
 149                                break;
 150                        if (c1 != c2)
 151                                return 0;
 152                        match++;
 153                        name++;
 154                        namelen--;
 155                }
 156        }
 157
 158
 159        /*
 160         * If we don't match the matchstring exactly,
 161         * we need to match by fnmatch
 162         */
 163        matchlen = strlen(match);
 164        if (strncmp_icase(match, name, matchlen))
 165                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 166
 167        if (namelen == matchlen)
 168                return MATCHED_EXACTLY;
 169        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 170                return MATCHED_RECURSIVELY;
 171        return 0;
 172}
 173
 174/*
 175 * Given a name and a list of pathspecs, see if the name matches
 176 * any of the pathspecs.  The caller is also interested in seeing
 177 * all pathspec matches some names it calls this function with
 178 * (otherwise the user could have mistyped the unmatched pathspec),
 179 * and a mark is left in seen[] array for pathspec element that
 180 * actually matched anything.
 181 */
 182int match_pathspec(const char **pathspec, const char *name, int namelen,
 183                int prefix, char *seen)
 184{
 185        int i, retval = 0;
 186
 187        if (!pathspec)
 188                return 1;
 189
 190        name += prefix;
 191        namelen -= prefix;
 192
 193        for (i = 0; pathspec[i] != NULL; i++) {
 194                int how;
 195                const char *match = pathspec[i] + prefix;
 196                if (seen && seen[i] == MATCHED_EXACTLY)
 197                        continue;
 198                how = match_one(match, name, namelen);
 199                if (how) {
 200                        if (retval < how)
 201                                retval = how;
 202                        if (seen && seen[i] < how)
 203                                seen[i] = how;
 204                }
 205        }
 206        return retval;
 207}
 208
 209/*
 210 * Does 'match' match the given name?
 211 * A match is found if
 212 *
 213 * (1) the 'match' string is leading directory of 'name', or
 214 * (2) the 'match' string is a wildcard and matches 'name', or
 215 * (3) the 'match' string is exactly the same as 'name'.
 216 *
 217 * and the return value tells which case it was.
 218 *
 219 * It returns 0 when there is no match.
 220 */
 221static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 222                               const char *name, int namelen)
 223{
 224        /* name/namelen has prefix cut off by caller */
 225        const char *match = item->match + prefix;
 226        int matchlen = item->len - prefix;
 227
 228        /* If the match was just the prefix, we matched */
 229        if (!*match)
 230                return MATCHED_RECURSIVELY;
 231
 232        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 233                if (matchlen == namelen)
 234                        return MATCHED_EXACTLY;
 235
 236                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 237                        return MATCHED_RECURSIVELY;
 238        }
 239
 240        if (item->use_wildcard && !fnmatch(match, name, 0))
 241                return MATCHED_FNMATCH;
 242
 243        return 0;
 244}
 245
 246/*
 247 * Given a name and a list of pathspecs, see if the name matches
 248 * any of the pathspecs.  The caller is also interested in seeing
 249 * all pathspec matches some names it calls this function with
 250 * (otherwise the user could have mistyped the unmatched pathspec),
 251 * and a mark is left in seen[] array for pathspec element that
 252 * actually matched anything.
 253 */
 254int match_pathspec_depth(const struct pathspec *ps,
 255                         const char *name, int namelen,
 256                         int prefix, char *seen)
 257{
 258        int i, retval = 0;
 259
 260        if (!ps->nr) {
 261                if (!ps->recursive || ps->max_depth == -1)
 262                        return MATCHED_RECURSIVELY;
 263
 264                if (within_depth(name, namelen, 0, ps->max_depth))
 265                        return MATCHED_EXACTLY;
 266                else
 267                        return 0;
 268        }
 269
 270        name += prefix;
 271        namelen -= prefix;
 272
 273        for (i = ps->nr - 1; i >= 0; i--) {
 274                int how;
 275                if (seen && seen[i] == MATCHED_EXACTLY)
 276                        continue;
 277                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 278                if (ps->recursive && ps->max_depth != -1 &&
 279                    how && how != MATCHED_FNMATCH) {
 280                        int len = ps->items[i].len;
 281                        if (name[len] == '/')
 282                                len++;
 283                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 284                                how = MATCHED_EXACTLY;
 285                        else
 286                                how = 0;
 287                }
 288                if (how) {
 289                        if (retval < how)
 290                                retval = how;
 291                        if (seen && seen[i] < how)
 292                                seen[i] = how;
 293                }
 294        }
 295        return retval;
 296}
 297
 298static int no_wildcard(const char *string)
 299{
 300        return string[strcspn(string, "*?[{\\")] == '\0';
 301}
 302
 303void add_exclude(const char *string, const char *base,
 304                 int baselen, struct exclude_list *which)
 305{
 306        struct exclude *x;
 307        size_t len;
 308        int to_exclude = 1;
 309        int flags = 0;
 310
 311        if (*string == '!') {
 312                to_exclude = 0;
 313                string++;
 314        }
 315        len = strlen(string);
 316        if (len && string[len - 1] == '/') {
 317                char *s;
 318                x = xmalloc(sizeof(*x) + len);
 319                s = (char *)(x+1);
 320                memcpy(s, string, len - 1);
 321                s[len - 1] = '\0';
 322                string = s;
 323                x->pattern = s;
 324                flags = EXC_FLAG_MUSTBEDIR;
 325        } else {
 326                x = xmalloc(sizeof(*x));
 327                x->pattern = string;
 328        }
 329        x->to_exclude = to_exclude;
 330        x->patternlen = strlen(string);
 331        x->base = base;
 332        x->baselen = baselen;
 333        x->flags = flags;
 334        if (!strchr(string, '/'))
 335                x->flags |= EXC_FLAG_NODIR;
 336        if (no_wildcard(string))
 337                x->flags |= EXC_FLAG_NOWILDCARD;
 338        if (*string == '*' && no_wildcard(string+1))
 339                x->flags |= EXC_FLAG_ENDSWITH;
 340        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 341        which->excludes[which->nr++] = x;
 342}
 343
 344static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 345{
 346        int pos, len;
 347        unsigned long sz;
 348        enum object_type type;
 349        void *data;
 350        struct index_state *istate = &the_index;
 351
 352        len = strlen(path);
 353        pos = index_name_pos(istate, path, len);
 354        if (pos < 0)
 355                return NULL;
 356        if (!ce_skip_worktree(istate->cache[pos]))
 357                return NULL;
 358        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 359        if (!data || type != OBJ_BLOB) {
 360                free(data);
 361                return NULL;
 362        }
 363        *size = xsize_t(sz);
 364        return data;
 365}
 366
 367void free_excludes(struct exclude_list *el)
 368{
 369        int i;
 370
 371        for (i = 0; i < el->nr; i++)
 372                free(el->excludes[i]);
 373        free(el->excludes);
 374
 375        el->nr = 0;
 376        el->excludes = NULL;
 377}
 378
 379int add_excludes_from_file_to_list(const char *fname,
 380                                   const char *base,
 381                                   int baselen,
 382                                   char **buf_p,
 383                                   struct exclude_list *which,
 384                                   int check_index)
 385{
 386        struct stat st;
 387        int fd, i;
 388        size_t size = 0;
 389        char *buf, *entry;
 390
 391        fd = open(fname, O_RDONLY);
 392        if (fd < 0 || fstat(fd, &st) < 0) {
 393                if (0 <= fd)
 394                        close(fd);
 395                if (!check_index ||
 396                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 397                        return -1;
 398                if (size == 0) {
 399                        free(buf);
 400                        return 0;
 401                }
 402                if (buf[size-1] != '\n') {
 403                        buf = xrealloc(buf, size+1);
 404                        buf[size++] = '\n';
 405                }
 406        }
 407        else {
 408                size = xsize_t(st.st_size);
 409                if (size == 0) {
 410                        close(fd);
 411                        return 0;
 412                }
 413                buf = xmalloc(size+1);
 414                if (read_in_full(fd, buf, size) != size) {
 415                        free(buf);
 416                        close(fd);
 417                        return -1;
 418                }
 419                buf[size++] = '\n';
 420                close(fd);
 421        }
 422
 423        if (buf_p)
 424                *buf_p = buf;
 425        entry = buf;
 426        for (i = 0; i < size; i++) {
 427                if (buf[i] == '\n') {
 428                        if (entry != buf + i && entry[0] != '#') {
 429                                buf[i - (i && buf[i-1] == '\r')] = 0;
 430                                add_exclude(entry, base, baselen, which);
 431                        }
 432                        entry = buf + i + 1;
 433                }
 434        }
 435        return 0;
 436}
 437
 438void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 439{
 440        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 441                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 442                die("cannot use %s as an exclude file", fname);
 443}
 444
 445static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 446{
 447        struct exclude_list *el;
 448        struct exclude_stack *stk = NULL;
 449        int current;
 450
 451        if ((!dir->exclude_per_dir) ||
 452            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 453                return; /* too long a path -- ignore */
 454
 455        /* Pop the ones that are not the prefix of the path being checked. */
 456        el = &dir->exclude_list[EXC_DIRS];
 457        while ((stk = dir->exclude_stack) != NULL) {
 458                if (stk->baselen <= baselen &&
 459                    !strncmp(dir->basebuf, base, stk->baselen))
 460                        break;
 461                dir->exclude_stack = stk->prev;
 462                while (stk->exclude_ix < el->nr)
 463                        free(el->excludes[--el->nr]);
 464                free(stk->filebuf);
 465                free(stk);
 466        }
 467
 468        /* Read from the parent directories and push them down. */
 469        current = stk ? stk->baselen : -1;
 470        while (current < baselen) {
 471                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 472                const char *cp;
 473
 474                if (current < 0) {
 475                        cp = base;
 476                        current = 0;
 477                }
 478                else {
 479                        cp = strchr(base + current + 1, '/');
 480                        if (!cp)
 481                                die("oops in prep_exclude");
 482                        cp++;
 483                }
 484                stk->prev = dir->exclude_stack;
 485                stk->baselen = cp - base;
 486                stk->exclude_ix = el->nr;
 487                memcpy(dir->basebuf + current, base + current,
 488                       stk->baselen - current);
 489                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 490                add_excludes_from_file_to_list(dir->basebuf,
 491                                               dir->basebuf, stk->baselen,
 492                                               &stk->filebuf, el, 1);
 493                dir->exclude_stack = stk;
 494                current = stk->baselen;
 495        }
 496        dir->basebuf[baselen] = '\0';
 497}
 498
 499/* Scan the list and let the last match determine the fate.
 500 * Return 1 for exclude, 0 for include and -1 for undecided.
 501 */
 502int excluded_from_list(const char *pathname,
 503                       int pathlen, const char *basename, int *dtype,
 504                       struct exclude_list *el)
 505{
 506        int i;
 507
 508        if (el->nr) {
 509                for (i = el->nr - 1; 0 <= i; i--) {
 510                        struct exclude *x = el->excludes[i];
 511                        const char *exclude = x->pattern;
 512                        int to_exclude = x->to_exclude;
 513
 514                        if (x->flags & EXC_FLAG_MUSTBEDIR) {
 515                                if (*dtype == DT_UNKNOWN)
 516                                        *dtype = get_dtype(NULL, pathname, pathlen);
 517                                if (*dtype != DT_DIR)
 518                                        continue;
 519                        }
 520
 521                        if (x->flags & EXC_FLAG_NODIR) {
 522                                /* match basename */
 523                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 524                                        if (!strcmp_icase(exclude, basename))
 525                                                return to_exclude;
 526                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
 527                                        if (x->patternlen - 1 <= pathlen &&
 528                                            !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
 529                                                return to_exclude;
 530                                } else {
 531                                        if (fnmatch_icase(exclude, basename, 0) == 0)
 532                                                return to_exclude;
 533                                }
 534                        }
 535                        else {
 536                                /* match with FNM_PATHNAME:
 537                                 * exclude has base (baselen long) implicitly
 538                                 * in front of it.
 539                                 */
 540                                int baselen = x->baselen;
 541                                if (*exclude == '/')
 542                                        exclude++;
 543
 544                                if (pathlen < baselen ||
 545                                    (baselen && pathname[baselen-1] != '/') ||
 546                                    strncmp_icase(pathname, x->base, baselen))
 547                                    continue;
 548
 549                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 550                                        if (!strcmp_icase(exclude, pathname + baselen))
 551                                                return to_exclude;
 552                                } else {
 553                                        if (fnmatch_icase(exclude, pathname+baselen,
 554                                                    FNM_PATHNAME) == 0)
 555                                            return to_exclude;
 556                                }
 557                        }
 558                }
 559        }
 560        return -1; /* undecided */
 561}
 562
 563int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 564{
 565        int pathlen = strlen(pathname);
 566        int st;
 567        const char *basename = strrchr(pathname, '/');
 568        basename = (basename) ? basename+1 : pathname;
 569
 570        prep_exclude(dir, pathname, basename-pathname);
 571        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 572                switch (excluded_from_list(pathname, pathlen, basename,
 573                                           dtype_p, &dir->exclude_list[st])) {
 574                case 0:
 575                        return 0;
 576                case 1:
 577                        return 1;
 578                }
 579        }
 580        return 0;
 581}
 582
 583void path_exclude_check_init(struct path_exclude_check *check,
 584                             struct dir_struct *dir)
 585{
 586        check->dir = dir;
 587        strbuf_init(&check->path, 256);
 588}
 589
 590void path_exclude_check_clear(struct path_exclude_check *check)
 591{
 592        strbuf_release(&check->path);
 593}
 594
 595/*
 596 * Is the ce->name excluded?  This is for a caller like show_files() that
 597 * do not honor directory hierarchy and iterate through paths that are
 598 * possibly in an ignored directory.
 599 *
 600 * A path to a directory known to be excluded is left in check->path to
 601 * optimize for repeated checks for files in the same excluded directory.
 602 */
 603int path_excluded(struct path_exclude_check *check, struct cache_entry *ce)
 604{
 605        int i, dtype;
 606        struct strbuf *path = &check->path;
 607
 608        if (path->len &&
 609            path->len <= ce_namelen(ce) &&
 610            !memcmp(ce->name, path->buf, path->len) &&
 611            (!ce->name[path->len] || ce->name[path->len] == '/'))
 612                return 1;
 613
 614        strbuf_setlen(path, 0);
 615        for (i = 0; ce->name[i]; i++) {
 616                int ch = ce->name[i];
 617
 618                if (ch == '/') {
 619                        dtype = DT_DIR;
 620                        if (excluded(check->dir, path->buf, &dtype))
 621                                return 1;
 622                }
 623                strbuf_addch(path, ch);
 624        }
 625
 626        /* An entry in the index; cannot be a directory with subentries */
 627        strbuf_setlen(path, 0);
 628
 629        dtype = ce_to_dtype(ce);
 630        return excluded(check->dir, ce->name, &dtype);
 631}
 632
 633static struct dir_entry *dir_entry_new(const char *pathname, int len)
 634{
 635        struct dir_entry *ent;
 636
 637        ent = xmalloc(sizeof(*ent) + len + 1);
 638        ent->len = len;
 639        memcpy(ent->name, pathname, len);
 640        ent->name[len] = 0;
 641        return ent;
 642}
 643
 644static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 645{
 646        if (cache_name_exists(pathname, len, ignore_case))
 647                return NULL;
 648
 649        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 650        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 651}
 652
 653struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 654{
 655        if (!cache_name_is_other(pathname, len))
 656                return NULL;
 657
 658        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 659        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 660}
 661
 662enum exist_status {
 663        index_nonexistent = 0,
 664        index_directory,
 665        index_gitdir
 666};
 667
 668/*
 669 * Do not use the alphabetically stored index to look up
 670 * the directory name; instead, use the case insensitive
 671 * name hash.
 672 */
 673static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 674{
 675        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 676        unsigned char endchar;
 677
 678        if (!ce)
 679                return index_nonexistent;
 680        endchar = ce->name[len];
 681
 682        /*
 683         * The cache_entry structure returned will contain this dirname
 684         * and possibly additional path components.
 685         */
 686        if (endchar == '/')
 687                return index_directory;
 688
 689        /*
 690         * If there are no additional path components, then this cache_entry
 691         * represents a submodule.  Submodules, despite being directories,
 692         * are stored in the cache without a closing slash.
 693         */
 694        if (!endchar && S_ISGITLINK(ce->ce_mode))
 695                return index_gitdir;
 696
 697        /* This should never be hit, but it exists just in case. */
 698        return index_nonexistent;
 699}
 700
 701/*
 702 * The index sorts alphabetically by entry name, which
 703 * means that a gitlink sorts as '\0' at the end, while
 704 * a directory (which is defined not as an entry, but as
 705 * the files it contains) will sort with the '/' at the
 706 * end.
 707 */
 708static enum exist_status directory_exists_in_index(const char *dirname, int len)
 709{
 710        int pos;
 711
 712        if (ignore_case)
 713                return directory_exists_in_index_icase(dirname, len);
 714
 715        pos = cache_name_pos(dirname, len);
 716        if (pos < 0)
 717                pos = -pos-1;
 718        while (pos < active_nr) {
 719                struct cache_entry *ce = active_cache[pos++];
 720                unsigned char endchar;
 721
 722                if (strncmp(ce->name, dirname, len))
 723                        break;
 724                endchar = ce->name[len];
 725                if (endchar > '/')
 726                        break;
 727                if (endchar == '/')
 728                        return index_directory;
 729                if (!endchar && S_ISGITLINK(ce->ce_mode))
 730                        return index_gitdir;
 731        }
 732        return index_nonexistent;
 733}
 734
 735/*
 736 * When we find a directory when traversing the filesystem, we
 737 * have three distinct cases:
 738 *
 739 *  - ignore it
 740 *  - see it as a directory
 741 *  - recurse into it
 742 *
 743 * and which one we choose depends on a combination of existing
 744 * git index contents and the flags passed into the directory
 745 * traversal routine.
 746 *
 747 * Case 1: If we *already* have entries in the index under that
 748 * directory name, we always recurse into the directory to see
 749 * all the files.
 750 *
 751 * Case 2: If we *already* have that directory name as a gitlink,
 752 * we always continue to see it as a gitlink, regardless of whether
 753 * there is an actual git directory there or not (it might not
 754 * be checked out as a subproject!)
 755 *
 756 * Case 3: if we didn't have it in the index previously, we
 757 * have a few sub-cases:
 758 *
 759 *  (a) if "show_other_directories" is true, we show it as
 760 *      just a directory, unless "hide_empty_directories" is
 761 *      also true and the directory is empty, in which case
 762 *      we just ignore it entirely.
 763 *  (b) if it looks like a git directory, and we don't have
 764 *      'no_gitlinks' set we treat it as a gitlink, and show it
 765 *      as a directory.
 766 *  (c) otherwise, we recurse into it.
 767 */
 768enum directory_treatment {
 769        show_directory,
 770        ignore_directory,
 771        recurse_into_directory
 772};
 773
 774static enum directory_treatment treat_directory(struct dir_struct *dir,
 775        const char *dirname, int len,
 776        const struct path_simplify *simplify)
 777{
 778        /* The "len-1" is to strip the final '/' */
 779        switch (directory_exists_in_index(dirname, len-1)) {
 780        case index_directory:
 781                return recurse_into_directory;
 782
 783        case index_gitdir:
 784                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 785                        return ignore_directory;
 786                return show_directory;
 787
 788        case index_nonexistent:
 789                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 790                        break;
 791                if (!(dir->flags & DIR_NO_GITLINKS)) {
 792                        unsigned char sha1[20];
 793                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 794                                return show_directory;
 795                }
 796                return recurse_into_directory;
 797        }
 798
 799        /* This is the "show_other_directories" case */
 800        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 801                return show_directory;
 802        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 803                return ignore_directory;
 804        return show_directory;
 805}
 806
 807/*
 808 * This is an inexact early pruning of any recursive directory
 809 * reading - if the path cannot possibly be in the pathspec,
 810 * return true, and we'll skip it early.
 811 */
 812static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 813{
 814        if (simplify) {
 815                for (;;) {
 816                        const char *match = simplify->path;
 817                        int len = simplify->len;
 818
 819                        if (!match)
 820                                break;
 821                        if (len > pathlen)
 822                                len = pathlen;
 823                        if (!memcmp(path, match, len))
 824                                return 0;
 825                        simplify++;
 826                }
 827                return 1;
 828        }
 829        return 0;
 830}
 831
 832/*
 833 * This function tells us whether an excluded path matches a
 834 * list of "interesting" pathspecs. That is, whether a path matched
 835 * by any of the pathspecs could possibly be ignored by excluding
 836 * the specified path. This can happen if:
 837 *
 838 *   1. the path is mentioned explicitly in the pathspec
 839 *
 840 *   2. the path is a directory prefix of some element in the
 841 *      pathspec
 842 */
 843static int exclude_matches_pathspec(const char *path, int len,
 844                const struct path_simplify *simplify)
 845{
 846        if (simplify) {
 847                for (; simplify->path; simplify++) {
 848                        if (len == simplify->len
 849                            && !memcmp(path, simplify->path, len))
 850                                return 1;
 851                        if (len < simplify->len
 852                            && simplify->path[len] == '/'
 853                            && !memcmp(path, simplify->path, len))
 854                                return 1;
 855                }
 856        }
 857        return 0;
 858}
 859
 860static int get_index_dtype(const char *path, int len)
 861{
 862        int pos;
 863        struct cache_entry *ce;
 864
 865        ce = cache_name_exists(path, len, 0);
 866        if (ce) {
 867                if (!ce_uptodate(ce))
 868                        return DT_UNKNOWN;
 869                if (S_ISGITLINK(ce->ce_mode))
 870                        return DT_DIR;
 871                /*
 872                 * Nobody actually cares about the
 873                 * difference between DT_LNK and DT_REG
 874                 */
 875                return DT_REG;
 876        }
 877
 878        /* Try to look it up as a directory */
 879        pos = cache_name_pos(path, len);
 880        if (pos >= 0)
 881                return DT_UNKNOWN;
 882        pos = -pos-1;
 883        while (pos < active_nr) {
 884                ce = active_cache[pos++];
 885                if (strncmp(ce->name, path, len))
 886                        break;
 887                if (ce->name[len] > '/')
 888                        break;
 889                if (ce->name[len] < '/')
 890                        continue;
 891                if (!ce_uptodate(ce))
 892                        break;  /* continue? */
 893                return DT_DIR;
 894        }
 895        return DT_UNKNOWN;
 896}
 897
 898static int get_dtype(struct dirent *de, const char *path, int len)
 899{
 900        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 901        struct stat st;
 902
 903        if (dtype != DT_UNKNOWN)
 904                return dtype;
 905        dtype = get_index_dtype(path, len);
 906        if (dtype != DT_UNKNOWN)
 907                return dtype;
 908        if (lstat(path, &st))
 909                return dtype;
 910        if (S_ISREG(st.st_mode))
 911                return DT_REG;
 912        if (S_ISDIR(st.st_mode))
 913                return DT_DIR;
 914        if (S_ISLNK(st.st_mode))
 915                return DT_LNK;
 916        return dtype;
 917}
 918
 919enum path_treatment {
 920        path_ignored,
 921        path_handled,
 922        path_recurse
 923};
 924
 925static enum path_treatment treat_one_path(struct dir_struct *dir,
 926                                          char *path, int *len,
 927                                          const struct path_simplify *simplify,
 928                                          int dtype, struct dirent *de)
 929{
 930        int exclude = excluded(dir, path, &dtype);
 931        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 932            && exclude_matches_pathspec(path, *len, simplify))
 933                dir_add_ignored(dir, path, *len);
 934
 935        /*
 936         * Excluded? If we don't explicitly want to show
 937         * ignored files, ignore it
 938         */
 939        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
 940                return path_ignored;
 941
 942        if (dtype == DT_UNKNOWN)
 943                dtype = get_dtype(de, path, *len);
 944
 945        /*
 946         * Do we want to see just the ignored files?
 947         * We still need to recurse into directories,
 948         * even if we don't ignore them, since the
 949         * directory may contain files that we do..
 950         */
 951        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
 952                if (dtype != DT_DIR)
 953                        return path_ignored;
 954        }
 955
 956        switch (dtype) {
 957        default:
 958                return path_ignored;
 959        case DT_DIR:
 960                memcpy(path + *len, "/", 2);
 961                (*len)++;
 962                switch (treat_directory(dir, path, *len, simplify)) {
 963                case show_directory:
 964                        if (exclude != !!(dir->flags
 965                                          & DIR_SHOW_IGNORED))
 966                                return path_ignored;
 967                        break;
 968                case recurse_into_directory:
 969                        return path_recurse;
 970                case ignore_directory:
 971                        return path_ignored;
 972                }
 973                break;
 974        case DT_REG:
 975        case DT_LNK:
 976                break;
 977        }
 978        return path_handled;
 979}
 980
 981static enum path_treatment treat_path(struct dir_struct *dir,
 982                                      struct dirent *de,
 983                                      char *path, int path_max,
 984                                      int baselen,
 985                                      const struct path_simplify *simplify,
 986                                      int *len)
 987{
 988        int dtype;
 989
 990        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 991                return path_ignored;
 992        *len = strlen(de->d_name);
 993        /* Ignore overly long pathnames! */
 994        if (*len + baselen + 8 > path_max)
 995                return path_ignored;
 996        memcpy(path + baselen, de->d_name, *len + 1);
 997        *len += baselen;
 998        if (simplify_away(path, *len, simplify))
 999                return path_ignored;
1000
1001        dtype = DTYPE(de);
1002        return treat_one_path(dir, path, len, simplify, dtype, de);
1003}
1004
1005/*
1006 * Read a directory tree. We currently ignore anything but
1007 * directories, regular files and symlinks. That's because git
1008 * doesn't handle them at all yet. Maybe that will change some
1009 * day.
1010 *
1011 * Also, we ignore the name ".git" (even if it is not a directory).
1012 * That likely will not change.
1013 */
1014static int read_directory_recursive(struct dir_struct *dir,
1015                                    const char *base, int baselen,
1016                                    int check_only,
1017                                    const struct path_simplify *simplify)
1018{
1019        DIR *fdir = opendir(*base ? base : ".");
1020        int contents = 0;
1021        struct dirent *de;
1022        char path[PATH_MAX + 1];
1023
1024        if (!fdir)
1025                return 0;
1026
1027        memcpy(path, base, baselen);
1028
1029        while ((de = readdir(fdir)) != NULL) {
1030                int len;
1031                switch (treat_path(dir, de, path, sizeof(path),
1032                                   baselen, simplify, &len)) {
1033                case path_recurse:
1034                        contents += read_directory_recursive(dir, path, len, 0, simplify);
1035                        continue;
1036                case path_ignored:
1037                        continue;
1038                case path_handled:
1039                        break;
1040                }
1041                contents++;
1042                if (check_only)
1043                        goto exit_early;
1044                else
1045                        dir_add_name(dir, path, len);
1046        }
1047exit_early:
1048        closedir(fdir);
1049
1050        return contents;
1051}
1052
1053static int cmp_name(const void *p1, const void *p2)
1054{
1055        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1056        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1057
1058        return cache_name_compare(e1->name, e1->len,
1059                                  e2->name, e2->len);
1060}
1061
1062/*
1063 * Return the length of the "simple" part of a path match limiter.
1064 */
1065static int simple_length(const char *match)
1066{
1067        int len = -1;
1068
1069        for (;;) {
1070                unsigned char c = *match++;
1071                len++;
1072                if (c == '\0' || is_glob_special(c))
1073                        return len;
1074        }
1075}
1076
1077static struct path_simplify *create_simplify(const char **pathspec)
1078{
1079        int nr, alloc = 0;
1080        struct path_simplify *simplify = NULL;
1081
1082        if (!pathspec)
1083                return NULL;
1084
1085        for (nr = 0 ; ; nr++) {
1086                const char *match;
1087                if (nr >= alloc) {
1088                        alloc = alloc_nr(alloc);
1089                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1090                }
1091                match = *pathspec++;
1092                if (!match)
1093                        break;
1094                simplify[nr].path = match;
1095                simplify[nr].len = simple_length(match);
1096        }
1097        simplify[nr].path = NULL;
1098        simplify[nr].len = 0;
1099        return simplify;
1100}
1101
1102static void free_simplify(struct path_simplify *simplify)
1103{
1104        free(simplify);
1105}
1106
1107static int treat_leading_path(struct dir_struct *dir,
1108                              const char *path, int len,
1109                              const struct path_simplify *simplify)
1110{
1111        char pathbuf[PATH_MAX];
1112        int baselen, blen;
1113        const char *cp;
1114
1115        while (len && path[len - 1] == '/')
1116                len--;
1117        if (!len)
1118                return 1;
1119        baselen = 0;
1120        while (1) {
1121                cp = path + baselen + !!baselen;
1122                cp = memchr(cp, '/', path + len - cp);
1123                if (!cp)
1124                        baselen = len;
1125                else
1126                        baselen = cp - path;
1127                memcpy(pathbuf, path, baselen);
1128                pathbuf[baselen] = '\0';
1129                if (!is_directory(pathbuf))
1130                        return 0;
1131                if (simplify_away(pathbuf, baselen, simplify))
1132                        return 0;
1133                blen = baselen;
1134                if (treat_one_path(dir, pathbuf, &blen, simplify,
1135                                   DT_DIR, NULL) == path_ignored)
1136                        return 0; /* do not recurse into it */
1137                if (len <= baselen)
1138                        return 1; /* finished checking */
1139        }
1140}
1141
1142int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1143{
1144        struct path_simplify *simplify;
1145
1146        if (has_symlink_leading_path(path, len))
1147                return dir->nr;
1148
1149        simplify = create_simplify(pathspec);
1150        if (!len || treat_leading_path(dir, path, len, simplify))
1151                read_directory_recursive(dir, path, len, 0, simplify);
1152        free_simplify(simplify);
1153        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1154        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1155        return dir->nr;
1156}
1157
1158int file_exists(const char *f)
1159{
1160        struct stat sb;
1161        return lstat(f, &sb) == 0;
1162}
1163
1164/*
1165 * Given two normalized paths (a trailing slash is ok), if subdir is
1166 * outside dir, return -1.  Otherwise return the offset in subdir that
1167 * can be used as relative path to dir.
1168 */
1169int dir_inside_of(const char *subdir, const char *dir)
1170{
1171        int offset = 0;
1172
1173        assert(dir && subdir && *dir && *subdir);
1174
1175        while (*dir && *subdir && *dir == *subdir) {
1176                dir++;
1177                subdir++;
1178                offset++;
1179        }
1180
1181        /* hel[p]/me vs hel[l]/yeah */
1182        if (*dir && *subdir)
1183                return -1;
1184
1185        if (!*subdir)
1186                return !*dir ? offset : -1; /* same dir */
1187
1188        /* foo/[b]ar vs foo/[] */
1189        if (is_dir_sep(dir[-1]))
1190                return is_dir_sep(subdir[-1]) ? offset : -1;
1191
1192        /* foo[/]bar vs foo[] */
1193        return is_dir_sep(*subdir) ? offset + 1 : -1;
1194}
1195
1196int is_inside_dir(const char *dir)
1197{
1198        char cwd[PATH_MAX];
1199        if (!dir)
1200                return 0;
1201        if (!getcwd(cwd, sizeof(cwd)))
1202                die_errno("can't find the current directory");
1203        return dir_inside_of(cwd, dir) >= 0;
1204}
1205
1206int is_empty_dir(const char *path)
1207{
1208        DIR *dir = opendir(path);
1209        struct dirent *e;
1210        int ret = 1;
1211
1212        if (!dir)
1213                return 0;
1214
1215        while ((e = readdir(dir)) != NULL)
1216                if (!is_dot_or_dotdot(e->d_name)) {
1217                        ret = 0;
1218                        break;
1219                }
1220
1221        closedir(dir);
1222        return ret;
1223}
1224
1225int remove_dir_recursively(struct strbuf *path, int flag)
1226{
1227        DIR *dir;
1228        struct dirent *e;
1229        int ret = 0, original_len = path->len, len;
1230        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1231        unsigned char submodule_head[20];
1232
1233        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1234            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head))
1235                /* Do not descend and nuke a nested git work tree. */
1236                return 0;
1237
1238        dir = opendir(path->buf);
1239        if (!dir)
1240                return rmdir(path->buf);
1241        if (path->buf[original_len - 1] != '/')
1242                strbuf_addch(path, '/');
1243
1244        len = path->len;
1245        while ((e = readdir(dir)) != NULL) {
1246                struct stat st;
1247                if (is_dot_or_dotdot(e->d_name))
1248                        continue;
1249
1250                strbuf_setlen(path, len);
1251                strbuf_addstr(path, e->d_name);
1252                if (lstat(path->buf, &st))
1253                        ; /* fall thru */
1254                else if (S_ISDIR(st.st_mode)) {
1255                        if (!remove_dir_recursively(path, only_empty))
1256                                continue; /* happy */
1257                } else if (!only_empty && !unlink(path->buf))
1258                        continue; /* happy, too */
1259
1260                /* path too long, stat fails, or non-directory still exists */
1261                ret = -1;
1262                break;
1263        }
1264        closedir(dir);
1265
1266        strbuf_setlen(path, original_len);
1267        if (!ret)
1268                ret = rmdir(path->buf);
1269        return ret;
1270}
1271
1272void setup_standard_excludes(struct dir_struct *dir)
1273{
1274        const char *path;
1275
1276        dir->exclude_per_dir = ".gitignore";
1277        path = git_path("info/exclude");
1278        if (!access(path, R_OK))
1279                add_excludes_from_file(dir, path);
1280        if (excludes_file && !access(excludes_file, R_OK))
1281                add_excludes_from_file(dir, excludes_file);
1282}
1283
1284int remove_path(const char *name)
1285{
1286        char *slash;
1287
1288        if (unlink(name) && errno != ENOENT)
1289                return -1;
1290
1291        slash = strrchr(name, '/');
1292        if (slash) {
1293                char *dirs = xstrdup(name);
1294                slash = dirs + (slash - name);
1295                do {
1296                        *slash = '\0';
1297                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1298                free(dirs);
1299        }
1300        return 0;
1301}
1302
1303static int pathspec_item_cmp(const void *a_, const void *b_)
1304{
1305        struct pathspec_item *a, *b;
1306
1307        a = (struct pathspec_item *)a_;
1308        b = (struct pathspec_item *)b_;
1309        return strcmp(a->match, b->match);
1310}
1311
1312int init_pathspec(struct pathspec *pathspec, const char **paths)
1313{
1314        const char **p = paths;
1315        int i;
1316
1317        memset(pathspec, 0, sizeof(*pathspec));
1318        if (!p)
1319                return 0;
1320        while (*p)
1321                p++;
1322        pathspec->raw = paths;
1323        pathspec->nr = p - paths;
1324        if (!pathspec->nr)
1325                return 0;
1326
1327        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1328        for (i = 0; i < pathspec->nr; i++) {
1329                struct pathspec_item *item = pathspec->items+i;
1330                const char *path = paths[i];
1331
1332                item->match = path;
1333                item->len = strlen(path);
1334                item->use_wildcard = !no_wildcard(path);
1335                if (item->use_wildcard)
1336                        pathspec->has_wildcard = 1;
1337        }
1338
1339        qsort(pathspec->items, pathspec->nr,
1340              sizeof(struct pathspec_item), pathspec_item_cmp);
1341
1342        return 0;
1343}
1344
1345void free_pathspec(struct pathspec *pathspec)
1346{
1347        free(pathspec->items);
1348        pathspec->items = NULL;
1349}