dir.con commit path_excluded(): update API to less cache-entry centric (782cd4c)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37static size_t common_prefix_len(const char **pathspec)
  38{
  39        const char *n, *first;
  40        size_t max = 0;
  41
  42        if (!pathspec)
  43                return max;
  44
  45        first = *pathspec;
  46        while ((n = *pathspec++)) {
  47                size_t i, len = 0;
  48                for (i = 0; first == n || i < max; i++) {
  49                        char c = n[i];
  50                        if (!c || c != first[i] || is_glob_special(c))
  51                                break;
  52                        if (c == '/')
  53                                len = i + 1;
  54                }
  55                if (first == n || len < max) {
  56                        max = len;
  57                        if (!max)
  58                                break;
  59                }
  60        }
  61        return max;
  62}
  63
  64/*
  65 * Returns a copy of the longest leading path common among all
  66 * pathspecs.
  67 */
  68char *common_prefix(const char **pathspec)
  69{
  70        unsigned long len = common_prefix_len(pathspec);
  71
  72        return len ? xmemdupz(*pathspec, len) : NULL;
  73}
  74
  75int fill_directory(struct dir_struct *dir, const char **pathspec)
  76{
  77        const char *path;
  78        size_t len;
  79
  80        /*
  81         * Calculate common prefix for the pathspec, and
  82         * use that to optimize the directory walk
  83         */
  84        len = common_prefix_len(pathspec);
  85        path = "";
  86
  87        if (len)
  88                path = xmemdupz(*pathspec, len);
  89
  90        /* Read the directory and prune it */
  91        read_directory(dir, path, len, pathspec);
  92        if (*path)
  93                free((char *)path);
  94        return len;
  95}
  96
  97int within_depth(const char *name, int namelen,
  98                        int depth, int max_depth)
  99{
 100        const char *cp = name, *cpe = name + namelen;
 101
 102        while (cp < cpe) {
 103                if (*cp++ != '/')
 104                        continue;
 105                depth++;
 106                if (depth > max_depth)
 107                        return 0;
 108        }
 109        return 1;
 110}
 111
 112/*
 113 * Does 'match' match the given name?
 114 * A match is found if
 115 *
 116 * (1) the 'match' string is leading directory of 'name', or
 117 * (2) the 'match' string is a wildcard and matches 'name', or
 118 * (3) the 'match' string is exactly the same as 'name'.
 119 *
 120 * and the return value tells which case it was.
 121 *
 122 * It returns 0 when there is no match.
 123 */
 124static int match_one(const char *match, const char *name, int namelen)
 125{
 126        int matchlen;
 127
 128        /* If the match was just the prefix, we matched */
 129        if (!*match)
 130                return MATCHED_RECURSIVELY;
 131
 132        if (ignore_case) {
 133                for (;;) {
 134                        unsigned char c1 = tolower(*match);
 135                        unsigned char c2 = tolower(*name);
 136                        if (c1 == '\0' || is_glob_special(c1))
 137                                break;
 138                        if (c1 != c2)
 139                                return 0;
 140                        match++;
 141                        name++;
 142                        namelen--;
 143                }
 144        } else {
 145                for (;;) {
 146                        unsigned char c1 = *match;
 147                        unsigned char c2 = *name;
 148                        if (c1 == '\0' || is_glob_special(c1))
 149                                break;
 150                        if (c1 != c2)
 151                                return 0;
 152                        match++;
 153                        name++;
 154                        namelen--;
 155                }
 156        }
 157
 158
 159        /*
 160         * If we don't match the matchstring exactly,
 161         * we need to match by fnmatch
 162         */
 163        matchlen = strlen(match);
 164        if (strncmp_icase(match, name, matchlen))
 165                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 166
 167        if (namelen == matchlen)
 168                return MATCHED_EXACTLY;
 169        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 170                return MATCHED_RECURSIVELY;
 171        return 0;
 172}
 173
 174/*
 175 * Given a name and a list of pathspecs, see if the name matches
 176 * any of the pathspecs.  The caller is also interested in seeing
 177 * all pathspec matches some names it calls this function with
 178 * (otherwise the user could have mistyped the unmatched pathspec),
 179 * and a mark is left in seen[] array for pathspec element that
 180 * actually matched anything.
 181 */
 182int match_pathspec(const char **pathspec, const char *name, int namelen,
 183                int prefix, char *seen)
 184{
 185        int i, retval = 0;
 186
 187        if (!pathspec)
 188                return 1;
 189
 190        name += prefix;
 191        namelen -= prefix;
 192
 193        for (i = 0; pathspec[i] != NULL; i++) {
 194                int how;
 195                const char *match = pathspec[i] + prefix;
 196                if (seen && seen[i] == MATCHED_EXACTLY)
 197                        continue;
 198                how = match_one(match, name, namelen);
 199                if (how) {
 200                        if (retval < how)
 201                                retval = how;
 202                        if (seen && seen[i] < how)
 203                                seen[i] = how;
 204                }
 205        }
 206        return retval;
 207}
 208
 209/*
 210 * Does 'match' match the given name?
 211 * A match is found if
 212 *
 213 * (1) the 'match' string is leading directory of 'name', or
 214 * (2) the 'match' string is a wildcard and matches 'name', or
 215 * (3) the 'match' string is exactly the same as 'name'.
 216 *
 217 * and the return value tells which case it was.
 218 *
 219 * It returns 0 when there is no match.
 220 */
 221static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 222                               const char *name, int namelen)
 223{
 224        /* name/namelen has prefix cut off by caller */
 225        const char *match = item->match + prefix;
 226        int matchlen = item->len - prefix;
 227
 228        /* If the match was just the prefix, we matched */
 229        if (!*match)
 230                return MATCHED_RECURSIVELY;
 231
 232        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 233                if (matchlen == namelen)
 234                        return MATCHED_EXACTLY;
 235
 236                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 237                        return MATCHED_RECURSIVELY;
 238        }
 239
 240        if (item->use_wildcard && !fnmatch(match, name, 0))
 241                return MATCHED_FNMATCH;
 242
 243        return 0;
 244}
 245
 246/*
 247 * Given a name and a list of pathspecs, see if the name matches
 248 * any of the pathspecs.  The caller is also interested in seeing
 249 * all pathspec matches some names it calls this function with
 250 * (otherwise the user could have mistyped the unmatched pathspec),
 251 * and a mark is left in seen[] array for pathspec element that
 252 * actually matched anything.
 253 */
 254int match_pathspec_depth(const struct pathspec *ps,
 255                         const char *name, int namelen,
 256                         int prefix, char *seen)
 257{
 258        int i, retval = 0;
 259
 260        if (!ps->nr) {
 261                if (!ps->recursive || ps->max_depth == -1)
 262                        return MATCHED_RECURSIVELY;
 263
 264                if (within_depth(name, namelen, 0, ps->max_depth))
 265                        return MATCHED_EXACTLY;
 266                else
 267                        return 0;
 268        }
 269
 270        name += prefix;
 271        namelen -= prefix;
 272
 273        for (i = ps->nr - 1; i >= 0; i--) {
 274                int how;
 275                if (seen && seen[i] == MATCHED_EXACTLY)
 276                        continue;
 277                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 278                if (ps->recursive && ps->max_depth != -1 &&
 279                    how && how != MATCHED_FNMATCH) {
 280                        int len = ps->items[i].len;
 281                        if (name[len] == '/')
 282                                len++;
 283                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 284                                how = MATCHED_EXACTLY;
 285                        else
 286                                how = 0;
 287                }
 288                if (how) {
 289                        if (retval < how)
 290                                retval = how;
 291                        if (seen && seen[i] < how)
 292                                seen[i] = how;
 293                }
 294        }
 295        return retval;
 296}
 297
 298static int no_wildcard(const char *string)
 299{
 300        return string[strcspn(string, "*?[{\\")] == '\0';
 301}
 302
 303void add_exclude(const char *string, const char *base,
 304                 int baselen, struct exclude_list *which)
 305{
 306        struct exclude *x;
 307        size_t len;
 308        int to_exclude = 1;
 309        int flags = 0;
 310
 311        if (*string == '!') {
 312                to_exclude = 0;
 313                string++;
 314        }
 315        len = strlen(string);
 316        if (len && string[len - 1] == '/') {
 317                char *s;
 318                x = xmalloc(sizeof(*x) + len);
 319                s = (char *)(x+1);
 320                memcpy(s, string, len - 1);
 321                s[len - 1] = '\0';
 322                string = s;
 323                x->pattern = s;
 324                flags = EXC_FLAG_MUSTBEDIR;
 325        } else {
 326                x = xmalloc(sizeof(*x));
 327                x->pattern = string;
 328        }
 329        x->to_exclude = to_exclude;
 330        x->patternlen = strlen(string);
 331        x->base = base;
 332        x->baselen = baselen;
 333        x->flags = flags;
 334        if (!strchr(string, '/'))
 335                x->flags |= EXC_FLAG_NODIR;
 336        if (no_wildcard(string))
 337                x->flags |= EXC_FLAG_NOWILDCARD;
 338        if (*string == '*' && no_wildcard(string+1))
 339                x->flags |= EXC_FLAG_ENDSWITH;
 340        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 341        which->excludes[which->nr++] = x;
 342}
 343
 344static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 345{
 346        int pos, len;
 347        unsigned long sz;
 348        enum object_type type;
 349        void *data;
 350        struct index_state *istate = &the_index;
 351
 352        len = strlen(path);
 353        pos = index_name_pos(istate, path, len);
 354        if (pos < 0)
 355                return NULL;
 356        if (!ce_skip_worktree(istate->cache[pos]))
 357                return NULL;
 358        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 359        if (!data || type != OBJ_BLOB) {
 360                free(data);
 361                return NULL;
 362        }
 363        *size = xsize_t(sz);
 364        return data;
 365}
 366
 367void free_excludes(struct exclude_list *el)
 368{
 369        int i;
 370
 371        for (i = 0; i < el->nr; i++)
 372                free(el->excludes[i]);
 373        free(el->excludes);
 374
 375        el->nr = 0;
 376        el->excludes = NULL;
 377}
 378
 379int add_excludes_from_file_to_list(const char *fname,
 380                                   const char *base,
 381                                   int baselen,
 382                                   char **buf_p,
 383                                   struct exclude_list *which,
 384                                   int check_index)
 385{
 386        struct stat st;
 387        int fd, i;
 388        size_t size = 0;
 389        char *buf, *entry;
 390
 391        fd = open(fname, O_RDONLY);
 392        if (fd < 0 || fstat(fd, &st) < 0) {
 393                if (0 <= fd)
 394                        close(fd);
 395                if (!check_index ||
 396                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 397                        return -1;
 398                if (size == 0) {
 399                        free(buf);
 400                        return 0;
 401                }
 402                if (buf[size-1] != '\n') {
 403                        buf = xrealloc(buf, size+1);
 404                        buf[size++] = '\n';
 405                }
 406        }
 407        else {
 408                size = xsize_t(st.st_size);
 409                if (size == 0) {
 410                        close(fd);
 411                        return 0;
 412                }
 413                buf = xmalloc(size+1);
 414                if (read_in_full(fd, buf, size) != size) {
 415                        free(buf);
 416                        close(fd);
 417                        return -1;
 418                }
 419                buf[size++] = '\n';
 420                close(fd);
 421        }
 422
 423        if (buf_p)
 424                *buf_p = buf;
 425        entry = buf;
 426        for (i = 0; i < size; i++) {
 427                if (buf[i] == '\n') {
 428                        if (entry != buf + i && entry[0] != '#') {
 429                                buf[i - (i && buf[i-1] == '\r')] = 0;
 430                                add_exclude(entry, base, baselen, which);
 431                        }
 432                        entry = buf + i + 1;
 433                }
 434        }
 435        return 0;
 436}
 437
 438void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 439{
 440        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 441                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 442                die("cannot use %s as an exclude file", fname);
 443}
 444
 445static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 446{
 447        struct exclude_list *el;
 448        struct exclude_stack *stk = NULL;
 449        int current;
 450
 451        if ((!dir->exclude_per_dir) ||
 452            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 453                return; /* too long a path -- ignore */
 454
 455        /* Pop the ones that are not the prefix of the path being checked. */
 456        el = &dir->exclude_list[EXC_DIRS];
 457        while ((stk = dir->exclude_stack) != NULL) {
 458                if (stk->baselen <= baselen &&
 459                    !strncmp(dir->basebuf, base, stk->baselen))
 460                        break;
 461                dir->exclude_stack = stk->prev;
 462                while (stk->exclude_ix < el->nr)
 463                        free(el->excludes[--el->nr]);
 464                free(stk->filebuf);
 465                free(stk);
 466        }
 467
 468        /* Read from the parent directories and push them down. */
 469        current = stk ? stk->baselen : -1;
 470        while (current < baselen) {
 471                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 472                const char *cp;
 473
 474                if (current < 0) {
 475                        cp = base;
 476                        current = 0;
 477                }
 478                else {
 479                        cp = strchr(base + current + 1, '/');
 480                        if (!cp)
 481                                die("oops in prep_exclude");
 482                        cp++;
 483                }
 484                stk->prev = dir->exclude_stack;
 485                stk->baselen = cp - base;
 486                stk->exclude_ix = el->nr;
 487                memcpy(dir->basebuf + current, base + current,
 488                       stk->baselen - current);
 489                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 490                add_excludes_from_file_to_list(dir->basebuf,
 491                                               dir->basebuf, stk->baselen,
 492                                               &stk->filebuf, el, 1);
 493                dir->exclude_stack = stk;
 494                current = stk->baselen;
 495        }
 496        dir->basebuf[baselen] = '\0';
 497}
 498
 499/* Scan the list and let the last match determine the fate.
 500 * Return 1 for exclude, 0 for include and -1 for undecided.
 501 */
 502int excluded_from_list(const char *pathname,
 503                       int pathlen, const char *basename, int *dtype,
 504                       struct exclude_list *el)
 505{
 506        int i;
 507
 508        if (el->nr) {
 509                for (i = el->nr - 1; 0 <= i; i--) {
 510                        struct exclude *x = el->excludes[i];
 511                        const char *exclude = x->pattern;
 512                        int to_exclude = x->to_exclude;
 513
 514                        if (x->flags & EXC_FLAG_MUSTBEDIR) {
 515                                if (*dtype == DT_UNKNOWN)
 516                                        *dtype = get_dtype(NULL, pathname, pathlen);
 517                                if (*dtype != DT_DIR)
 518                                        continue;
 519                        }
 520
 521                        if (x->flags & EXC_FLAG_NODIR) {
 522                                /* match basename */
 523                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 524                                        if (!strcmp_icase(exclude, basename))
 525                                                return to_exclude;
 526                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
 527                                        if (x->patternlen - 1 <= pathlen &&
 528                                            !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
 529                                                return to_exclude;
 530                                } else {
 531                                        if (fnmatch_icase(exclude, basename, 0) == 0)
 532                                                return to_exclude;
 533                                }
 534                        }
 535                        else {
 536                                /* match with FNM_PATHNAME:
 537                                 * exclude has base (baselen long) implicitly
 538                                 * in front of it.
 539                                 */
 540                                int baselen = x->baselen;
 541                                if (*exclude == '/')
 542                                        exclude++;
 543
 544                                if (pathlen < baselen ||
 545                                    (baselen && pathname[baselen-1] != '/') ||
 546                                    strncmp_icase(pathname, x->base, baselen))
 547                                    continue;
 548
 549                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 550                                        if (!strcmp_icase(exclude, pathname + baselen))
 551                                                return to_exclude;
 552                                } else {
 553                                        if (fnmatch_icase(exclude, pathname+baselen,
 554                                                    FNM_PATHNAME) == 0)
 555                                            return to_exclude;
 556                                }
 557                        }
 558                }
 559        }
 560        return -1; /* undecided */
 561}
 562
 563int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 564{
 565        int pathlen = strlen(pathname);
 566        int st;
 567        const char *basename = strrchr(pathname, '/');
 568        basename = (basename) ? basename+1 : pathname;
 569
 570        prep_exclude(dir, pathname, basename-pathname);
 571        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 572                switch (excluded_from_list(pathname, pathlen, basename,
 573                                           dtype_p, &dir->exclude_list[st])) {
 574                case 0:
 575                        return 0;
 576                case 1:
 577                        return 1;
 578                }
 579        }
 580        return 0;
 581}
 582
 583void path_exclude_check_init(struct path_exclude_check *check,
 584                             struct dir_struct *dir)
 585{
 586        check->dir = dir;
 587        strbuf_init(&check->path, 256);
 588}
 589
 590void path_exclude_check_clear(struct path_exclude_check *check)
 591{
 592        strbuf_release(&check->path);
 593}
 594
 595/*
 596 * Is this name excluded?  This is for a caller like show_files() that
 597 * do not honor directory hierarchy and iterate through paths that are
 598 * possibly in an ignored directory.
 599 *
 600 * A path to a directory known to be excluded is left in check->path to
 601 * optimize for repeated checks for files in the same excluded directory.
 602 */
 603int path_excluded(struct path_exclude_check *check,
 604                  const char *name, int namelen, int *dtype)
 605{
 606        int i;
 607        struct strbuf *path = &check->path;
 608
 609        /*
 610         * we allow the caller to pass namelen as an optimization; it
 611         * must match the length of the name, as we eventually call
 612         * excluded() on the whole name string.
 613         */
 614        if (namelen < 0)
 615                namelen = strlen(name);
 616
 617        if (path->len &&
 618            path->len <= namelen &&
 619            !memcmp(name, path->buf, path->len) &&
 620            (!name[path->len] || name[path->len] == '/'))
 621                return 1;
 622
 623        strbuf_setlen(path, 0);
 624        for (i = 0; name[i]; i++) {
 625                int ch = name[i];
 626
 627                if (ch == '/') {
 628                        int dt = DT_DIR;
 629                        if (excluded(check->dir, path->buf, &dt))
 630                                return 1;
 631                }
 632                strbuf_addch(path, ch);
 633        }
 634
 635        /* An entry in the index; cannot be a directory with subentries */
 636        strbuf_setlen(path, 0);
 637
 638        return excluded(check->dir, name, dtype);
 639}
 640
 641static struct dir_entry *dir_entry_new(const char *pathname, int len)
 642{
 643        struct dir_entry *ent;
 644
 645        ent = xmalloc(sizeof(*ent) + len + 1);
 646        ent->len = len;
 647        memcpy(ent->name, pathname, len);
 648        ent->name[len] = 0;
 649        return ent;
 650}
 651
 652static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 653{
 654        if (cache_name_exists(pathname, len, ignore_case))
 655                return NULL;
 656
 657        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 658        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 659}
 660
 661struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 662{
 663        if (!cache_name_is_other(pathname, len))
 664                return NULL;
 665
 666        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 667        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 668}
 669
 670enum exist_status {
 671        index_nonexistent = 0,
 672        index_directory,
 673        index_gitdir
 674};
 675
 676/*
 677 * Do not use the alphabetically stored index to look up
 678 * the directory name; instead, use the case insensitive
 679 * name hash.
 680 */
 681static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 682{
 683        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 684        unsigned char endchar;
 685
 686        if (!ce)
 687                return index_nonexistent;
 688        endchar = ce->name[len];
 689
 690        /*
 691         * The cache_entry structure returned will contain this dirname
 692         * and possibly additional path components.
 693         */
 694        if (endchar == '/')
 695                return index_directory;
 696
 697        /*
 698         * If there are no additional path components, then this cache_entry
 699         * represents a submodule.  Submodules, despite being directories,
 700         * are stored in the cache without a closing slash.
 701         */
 702        if (!endchar && S_ISGITLINK(ce->ce_mode))
 703                return index_gitdir;
 704
 705        /* This should never be hit, but it exists just in case. */
 706        return index_nonexistent;
 707}
 708
 709/*
 710 * The index sorts alphabetically by entry name, which
 711 * means that a gitlink sorts as '\0' at the end, while
 712 * a directory (which is defined not as an entry, but as
 713 * the files it contains) will sort with the '/' at the
 714 * end.
 715 */
 716static enum exist_status directory_exists_in_index(const char *dirname, int len)
 717{
 718        int pos;
 719
 720        if (ignore_case)
 721                return directory_exists_in_index_icase(dirname, len);
 722
 723        pos = cache_name_pos(dirname, len);
 724        if (pos < 0)
 725                pos = -pos-1;
 726        while (pos < active_nr) {
 727                struct cache_entry *ce = active_cache[pos++];
 728                unsigned char endchar;
 729
 730                if (strncmp(ce->name, dirname, len))
 731                        break;
 732                endchar = ce->name[len];
 733                if (endchar > '/')
 734                        break;
 735                if (endchar == '/')
 736                        return index_directory;
 737                if (!endchar && S_ISGITLINK(ce->ce_mode))
 738                        return index_gitdir;
 739        }
 740        return index_nonexistent;
 741}
 742
 743/*
 744 * When we find a directory when traversing the filesystem, we
 745 * have three distinct cases:
 746 *
 747 *  - ignore it
 748 *  - see it as a directory
 749 *  - recurse into it
 750 *
 751 * and which one we choose depends on a combination of existing
 752 * git index contents and the flags passed into the directory
 753 * traversal routine.
 754 *
 755 * Case 1: If we *already* have entries in the index under that
 756 * directory name, we always recurse into the directory to see
 757 * all the files.
 758 *
 759 * Case 2: If we *already* have that directory name as a gitlink,
 760 * we always continue to see it as a gitlink, regardless of whether
 761 * there is an actual git directory there or not (it might not
 762 * be checked out as a subproject!)
 763 *
 764 * Case 3: if we didn't have it in the index previously, we
 765 * have a few sub-cases:
 766 *
 767 *  (a) if "show_other_directories" is true, we show it as
 768 *      just a directory, unless "hide_empty_directories" is
 769 *      also true and the directory is empty, in which case
 770 *      we just ignore it entirely.
 771 *  (b) if it looks like a git directory, and we don't have
 772 *      'no_gitlinks' set we treat it as a gitlink, and show it
 773 *      as a directory.
 774 *  (c) otherwise, we recurse into it.
 775 */
 776enum directory_treatment {
 777        show_directory,
 778        ignore_directory,
 779        recurse_into_directory
 780};
 781
 782static enum directory_treatment treat_directory(struct dir_struct *dir,
 783        const char *dirname, int len,
 784        const struct path_simplify *simplify)
 785{
 786        /* The "len-1" is to strip the final '/' */
 787        switch (directory_exists_in_index(dirname, len-1)) {
 788        case index_directory:
 789                return recurse_into_directory;
 790
 791        case index_gitdir:
 792                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 793                        return ignore_directory;
 794                return show_directory;
 795
 796        case index_nonexistent:
 797                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 798                        break;
 799                if (!(dir->flags & DIR_NO_GITLINKS)) {
 800                        unsigned char sha1[20];
 801                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 802                                return show_directory;
 803                }
 804                return recurse_into_directory;
 805        }
 806
 807        /* This is the "show_other_directories" case */
 808        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 809                return show_directory;
 810        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 811                return ignore_directory;
 812        return show_directory;
 813}
 814
 815/*
 816 * This is an inexact early pruning of any recursive directory
 817 * reading - if the path cannot possibly be in the pathspec,
 818 * return true, and we'll skip it early.
 819 */
 820static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 821{
 822        if (simplify) {
 823                for (;;) {
 824                        const char *match = simplify->path;
 825                        int len = simplify->len;
 826
 827                        if (!match)
 828                                break;
 829                        if (len > pathlen)
 830                                len = pathlen;
 831                        if (!memcmp(path, match, len))
 832                                return 0;
 833                        simplify++;
 834                }
 835                return 1;
 836        }
 837        return 0;
 838}
 839
 840/*
 841 * This function tells us whether an excluded path matches a
 842 * list of "interesting" pathspecs. That is, whether a path matched
 843 * by any of the pathspecs could possibly be ignored by excluding
 844 * the specified path. This can happen if:
 845 *
 846 *   1. the path is mentioned explicitly in the pathspec
 847 *
 848 *   2. the path is a directory prefix of some element in the
 849 *      pathspec
 850 */
 851static int exclude_matches_pathspec(const char *path, int len,
 852                const struct path_simplify *simplify)
 853{
 854        if (simplify) {
 855                for (; simplify->path; simplify++) {
 856                        if (len == simplify->len
 857                            && !memcmp(path, simplify->path, len))
 858                                return 1;
 859                        if (len < simplify->len
 860                            && simplify->path[len] == '/'
 861                            && !memcmp(path, simplify->path, len))
 862                                return 1;
 863                }
 864        }
 865        return 0;
 866}
 867
 868static int get_index_dtype(const char *path, int len)
 869{
 870        int pos;
 871        struct cache_entry *ce;
 872
 873        ce = cache_name_exists(path, len, 0);
 874        if (ce) {
 875                if (!ce_uptodate(ce))
 876                        return DT_UNKNOWN;
 877                if (S_ISGITLINK(ce->ce_mode))
 878                        return DT_DIR;
 879                /*
 880                 * Nobody actually cares about the
 881                 * difference between DT_LNK and DT_REG
 882                 */
 883                return DT_REG;
 884        }
 885
 886        /* Try to look it up as a directory */
 887        pos = cache_name_pos(path, len);
 888        if (pos >= 0)
 889                return DT_UNKNOWN;
 890        pos = -pos-1;
 891        while (pos < active_nr) {
 892                ce = active_cache[pos++];
 893                if (strncmp(ce->name, path, len))
 894                        break;
 895                if (ce->name[len] > '/')
 896                        break;
 897                if (ce->name[len] < '/')
 898                        continue;
 899                if (!ce_uptodate(ce))
 900                        break;  /* continue? */
 901                return DT_DIR;
 902        }
 903        return DT_UNKNOWN;
 904}
 905
 906static int get_dtype(struct dirent *de, const char *path, int len)
 907{
 908        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 909        struct stat st;
 910
 911        if (dtype != DT_UNKNOWN)
 912                return dtype;
 913        dtype = get_index_dtype(path, len);
 914        if (dtype != DT_UNKNOWN)
 915                return dtype;
 916        if (lstat(path, &st))
 917                return dtype;
 918        if (S_ISREG(st.st_mode))
 919                return DT_REG;
 920        if (S_ISDIR(st.st_mode))
 921                return DT_DIR;
 922        if (S_ISLNK(st.st_mode))
 923                return DT_LNK;
 924        return dtype;
 925}
 926
 927enum path_treatment {
 928        path_ignored,
 929        path_handled,
 930        path_recurse
 931};
 932
 933static enum path_treatment treat_one_path(struct dir_struct *dir,
 934                                          char *path, int *len,
 935                                          const struct path_simplify *simplify,
 936                                          int dtype, struct dirent *de)
 937{
 938        int exclude = excluded(dir, path, &dtype);
 939        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 940            && exclude_matches_pathspec(path, *len, simplify))
 941                dir_add_ignored(dir, path, *len);
 942
 943        /*
 944         * Excluded? If we don't explicitly want to show
 945         * ignored files, ignore it
 946         */
 947        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
 948                return path_ignored;
 949
 950        if (dtype == DT_UNKNOWN)
 951                dtype = get_dtype(de, path, *len);
 952
 953        /*
 954         * Do we want to see just the ignored files?
 955         * We still need to recurse into directories,
 956         * even if we don't ignore them, since the
 957         * directory may contain files that we do..
 958         */
 959        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
 960                if (dtype != DT_DIR)
 961                        return path_ignored;
 962        }
 963
 964        switch (dtype) {
 965        default:
 966                return path_ignored;
 967        case DT_DIR:
 968                memcpy(path + *len, "/", 2);
 969                (*len)++;
 970                switch (treat_directory(dir, path, *len, simplify)) {
 971                case show_directory:
 972                        if (exclude != !!(dir->flags
 973                                          & DIR_SHOW_IGNORED))
 974                                return path_ignored;
 975                        break;
 976                case recurse_into_directory:
 977                        return path_recurse;
 978                case ignore_directory:
 979                        return path_ignored;
 980                }
 981                break;
 982        case DT_REG:
 983        case DT_LNK:
 984                break;
 985        }
 986        return path_handled;
 987}
 988
 989static enum path_treatment treat_path(struct dir_struct *dir,
 990                                      struct dirent *de,
 991                                      char *path, int path_max,
 992                                      int baselen,
 993                                      const struct path_simplify *simplify,
 994                                      int *len)
 995{
 996        int dtype;
 997
 998        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 999                return path_ignored;
1000        *len = strlen(de->d_name);
1001        /* Ignore overly long pathnames! */
1002        if (*len + baselen + 8 > path_max)
1003                return path_ignored;
1004        memcpy(path + baselen, de->d_name, *len + 1);
1005        *len += baselen;
1006        if (simplify_away(path, *len, simplify))
1007                return path_ignored;
1008
1009        dtype = DTYPE(de);
1010        return treat_one_path(dir, path, len, simplify, dtype, de);
1011}
1012
1013/*
1014 * Read a directory tree. We currently ignore anything but
1015 * directories, regular files and symlinks. That's because git
1016 * doesn't handle them at all yet. Maybe that will change some
1017 * day.
1018 *
1019 * Also, we ignore the name ".git" (even if it is not a directory).
1020 * That likely will not change.
1021 */
1022static int read_directory_recursive(struct dir_struct *dir,
1023                                    const char *base, int baselen,
1024                                    int check_only,
1025                                    const struct path_simplify *simplify)
1026{
1027        DIR *fdir = opendir(*base ? base : ".");
1028        int contents = 0;
1029        struct dirent *de;
1030        char path[PATH_MAX + 1];
1031
1032        if (!fdir)
1033                return 0;
1034
1035        memcpy(path, base, baselen);
1036
1037        while ((de = readdir(fdir)) != NULL) {
1038                int len;
1039                switch (treat_path(dir, de, path, sizeof(path),
1040                                   baselen, simplify, &len)) {
1041                case path_recurse:
1042                        contents += read_directory_recursive(dir, path, len, 0, simplify);
1043                        continue;
1044                case path_ignored:
1045                        continue;
1046                case path_handled:
1047                        break;
1048                }
1049                contents++;
1050                if (check_only)
1051                        goto exit_early;
1052                else
1053                        dir_add_name(dir, path, len);
1054        }
1055exit_early:
1056        closedir(fdir);
1057
1058        return contents;
1059}
1060
1061static int cmp_name(const void *p1, const void *p2)
1062{
1063        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1064        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1065
1066        return cache_name_compare(e1->name, e1->len,
1067                                  e2->name, e2->len);
1068}
1069
1070/*
1071 * Return the length of the "simple" part of a path match limiter.
1072 */
1073static int simple_length(const char *match)
1074{
1075        int len = -1;
1076
1077        for (;;) {
1078                unsigned char c = *match++;
1079                len++;
1080                if (c == '\0' || is_glob_special(c))
1081                        return len;
1082        }
1083}
1084
1085static struct path_simplify *create_simplify(const char **pathspec)
1086{
1087        int nr, alloc = 0;
1088        struct path_simplify *simplify = NULL;
1089
1090        if (!pathspec)
1091                return NULL;
1092
1093        for (nr = 0 ; ; nr++) {
1094                const char *match;
1095                if (nr >= alloc) {
1096                        alloc = alloc_nr(alloc);
1097                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1098                }
1099                match = *pathspec++;
1100                if (!match)
1101                        break;
1102                simplify[nr].path = match;
1103                simplify[nr].len = simple_length(match);
1104        }
1105        simplify[nr].path = NULL;
1106        simplify[nr].len = 0;
1107        return simplify;
1108}
1109
1110static void free_simplify(struct path_simplify *simplify)
1111{
1112        free(simplify);
1113}
1114
1115static int treat_leading_path(struct dir_struct *dir,
1116                              const char *path, int len,
1117                              const struct path_simplify *simplify)
1118{
1119        char pathbuf[PATH_MAX];
1120        int baselen, blen;
1121        const char *cp;
1122
1123        while (len && path[len - 1] == '/')
1124                len--;
1125        if (!len)
1126                return 1;
1127        baselen = 0;
1128        while (1) {
1129                cp = path + baselen + !!baselen;
1130                cp = memchr(cp, '/', path + len - cp);
1131                if (!cp)
1132                        baselen = len;
1133                else
1134                        baselen = cp - path;
1135                memcpy(pathbuf, path, baselen);
1136                pathbuf[baselen] = '\0';
1137                if (!is_directory(pathbuf))
1138                        return 0;
1139                if (simplify_away(pathbuf, baselen, simplify))
1140                        return 0;
1141                blen = baselen;
1142                if (treat_one_path(dir, pathbuf, &blen, simplify,
1143                                   DT_DIR, NULL) == path_ignored)
1144                        return 0; /* do not recurse into it */
1145                if (len <= baselen)
1146                        return 1; /* finished checking */
1147        }
1148}
1149
1150int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1151{
1152        struct path_simplify *simplify;
1153
1154        if (has_symlink_leading_path(path, len))
1155                return dir->nr;
1156
1157        simplify = create_simplify(pathspec);
1158        if (!len || treat_leading_path(dir, path, len, simplify))
1159                read_directory_recursive(dir, path, len, 0, simplify);
1160        free_simplify(simplify);
1161        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1162        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1163        return dir->nr;
1164}
1165
1166int file_exists(const char *f)
1167{
1168        struct stat sb;
1169        return lstat(f, &sb) == 0;
1170}
1171
1172/*
1173 * Given two normalized paths (a trailing slash is ok), if subdir is
1174 * outside dir, return -1.  Otherwise return the offset in subdir that
1175 * can be used as relative path to dir.
1176 */
1177int dir_inside_of(const char *subdir, const char *dir)
1178{
1179        int offset = 0;
1180
1181        assert(dir && subdir && *dir && *subdir);
1182
1183        while (*dir && *subdir && *dir == *subdir) {
1184                dir++;
1185                subdir++;
1186                offset++;
1187        }
1188
1189        /* hel[p]/me vs hel[l]/yeah */
1190        if (*dir && *subdir)
1191                return -1;
1192
1193        if (!*subdir)
1194                return !*dir ? offset : -1; /* same dir */
1195
1196        /* foo/[b]ar vs foo/[] */
1197        if (is_dir_sep(dir[-1]))
1198                return is_dir_sep(subdir[-1]) ? offset : -1;
1199
1200        /* foo[/]bar vs foo[] */
1201        return is_dir_sep(*subdir) ? offset + 1 : -1;
1202}
1203
1204int is_inside_dir(const char *dir)
1205{
1206        char cwd[PATH_MAX];
1207        if (!dir)
1208                return 0;
1209        if (!getcwd(cwd, sizeof(cwd)))
1210                die_errno("can't find the current directory");
1211        return dir_inside_of(cwd, dir) >= 0;
1212}
1213
1214int is_empty_dir(const char *path)
1215{
1216        DIR *dir = opendir(path);
1217        struct dirent *e;
1218        int ret = 1;
1219
1220        if (!dir)
1221                return 0;
1222
1223        while ((e = readdir(dir)) != NULL)
1224                if (!is_dot_or_dotdot(e->d_name)) {
1225                        ret = 0;
1226                        break;
1227                }
1228
1229        closedir(dir);
1230        return ret;
1231}
1232
1233int remove_dir_recursively(struct strbuf *path, int flag)
1234{
1235        DIR *dir;
1236        struct dirent *e;
1237        int ret = 0, original_len = path->len, len;
1238        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1239        unsigned char submodule_head[20];
1240
1241        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1242            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head))
1243                /* Do not descend and nuke a nested git work tree. */
1244                return 0;
1245
1246        dir = opendir(path->buf);
1247        if (!dir)
1248                return rmdir(path->buf);
1249        if (path->buf[original_len - 1] != '/')
1250                strbuf_addch(path, '/');
1251
1252        len = path->len;
1253        while ((e = readdir(dir)) != NULL) {
1254                struct stat st;
1255                if (is_dot_or_dotdot(e->d_name))
1256                        continue;
1257
1258                strbuf_setlen(path, len);
1259                strbuf_addstr(path, e->d_name);
1260                if (lstat(path->buf, &st))
1261                        ; /* fall thru */
1262                else if (S_ISDIR(st.st_mode)) {
1263                        if (!remove_dir_recursively(path, only_empty))
1264                                continue; /* happy */
1265                } else if (!only_empty && !unlink(path->buf))
1266                        continue; /* happy, too */
1267
1268                /* path too long, stat fails, or non-directory still exists */
1269                ret = -1;
1270                break;
1271        }
1272        closedir(dir);
1273
1274        strbuf_setlen(path, original_len);
1275        if (!ret)
1276                ret = rmdir(path->buf);
1277        return ret;
1278}
1279
1280void setup_standard_excludes(struct dir_struct *dir)
1281{
1282        const char *path;
1283
1284        dir->exclude_per_dir = ".gitignore";
1285        path = git_path("info/exclude");
1286        if (!access(path, R_OK))
1287                add_excludes_from_file(dir, path);
1288        if (excludes_file && !access(excludes_file, R_OK))
1289                add_excludes_from_file(dir, excludes_file);
1290}
1291
1292int remove_path(const char *name)
1293{
1294        char *slash;
1295
1296        if (unlink(name) && errno != ENOENT)
1297                return -1;
1298
1299        slash = strrchr(name, '/');
1300        if (slash) {
1301                char *dirs = xstrdup(name);
1302                slash = dirs + (slash - name);
1303                do {
1304                        *slash = '\0';
1305                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1306                free(dirs);
1307        }
1308        return 0;
1309}
1310
1311static int pathspec_item_cmp(const void *a_, const void *b_)
1312{
1313        struct pathspec_item *a, *b;
1314
1315        a = (struct pathspec_item *)a_;
1316        b = (struct pathspec_item *)b_;
1317        return strcmp(a->match, b->match);
1318}
1319
1320int init_pathspec(struct pathspec *pathspec, const char **paths)
1321{
1322        const char **p = paths;
1323        int i;
1324
1325        memset(pathspec, 0, sizeof(*pathspec));
1326        if (!p)
1327                return 0;
1328        while (*p)
1329                p++;
1330        pathspec->raw = paths;
1331        pathspec->nr = p - paths;
1332        if (!pathspec->nr)
1333                return 0;
1334
1335        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1336        for (i = 0; i < pathspec->nr; i++) {
1337                struct pathspec_item *item = pathspec->items+i;
1338                const char *path = paths[i];
1339
1340                item->match = path;
1341                item->len = strlen(path);
1342                item->use_wildcard = !no_wildcard(path);
1343                if (item->use_wildcard)
1344                        pathspec->has_wildcard = 1;
1345        }
1346
1347        qsort(pathspec->items, pathspec->nr,
1348              sizeof(struct pathspec_item), pathspec_item_cmp);
1349
1350        return 0;
1351}
1352
1353void free_pathspec(struct pathspec *pathspec)
1354{
1355        free(pathspec->items);
1356        pathspec->items = NULL;
1357}