dir.con commit ls-files -i: pay attention to exclusion of leading paths (eb41775)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37static size_t common_prefix_len(const char **pathspec)
  38{
  39        const char *n, *first;
  40        size_t max = 0;
  41
  42        if (!pathspec)
  43                return max;
  44
  45        first = *pathspec;
  46        while ((n = *pathspec++)) {
  47                size_t i, len = 0;
  48                for (i = 0; first == n || i < max; i++) {
  49                        char c = n[i];
  50                        if (!c || c != first[i] || is_glob_special(c))
  51                                break;
  52                        if (c == '/')
  53                                len = i + 1;
  54                }
  55                if (first == n || len < max) {
  56                        max = len;
  57                        if (!max)
  58                                break;
  59                }
  60        }
  61        return max;
  62}
  63
  64/*
  65 * Returns a copy of the longest leading path common among all
  66 * pathspecs.
  67 */
  68char *common_prefix(const char **pathspec)
  69{
  70        unsigned long len = common_prefix_len(pathspec);
  71
  72        return len ? xmemdupz(*pathspec, len) : NULL;
  73}
  74
  75int fill_directory(struct dir_struct *dir, const char **pathspec)
  76{
  77        const char *path;
  78        size_t len;
  79
  80        /*
  81         * Calculate common prefix for the pathspec, and
  82         * use that to optimize the directory walk
  83         */
  84        len = common_prefix_len(pathspec);
  85        path = "";
  86
  87        if (len)
  88                path = xmemdupz(*pathspec, len);
  89
  90        /* Read the directory and prune it */
  91        read_directory(dir, path, len, pathspec);
  92        if (*path)
  93                free((char *)path);
  94        return len;
  95}
  96
  97int within_depth(const char *name, int namelen,
  98                        int depth, int max_depth)
  99{
 100        const char *cp = name, *cpe = name + namelen;
 101
 102        while (cp < cpe) {
 103                if (*cp++ != '/')
 104                        continue;
 105                depth++;
 106                if (depth > max_depth)
 107                        return 0;
 108        }
 109        return 1;
 110}
 111
 112/*
 113 * Does 'match' match the given name?
 114 * A match is found if
 115 *
 116 * (1) the 'match' string is leading directory of 'name', or
 117 * (2) the 'match' string is a wildcard and matches 'name', or
 118 * (3) the 'match' string is exactly the same as 'name'.
 119 *
 120 * and the return value tells which case it was.
 121 *
 122 * It returns 0 when there is no match.
 123 */
 124static int match_one(const char *match, const char *name, int namelen)
 125{
 126        int matchlen;
 127
 128        /* If the match was just the prefix, we matched */
 129        if (!*match)
 130                return MATCHED_RECURSIVELY;
 131
 132        if (ignore_case) {
 133                for (;;) {
 134                        unsigned char c1 = tolower(*match);
 135                        unsigned char c2 = tolower(*name);
 136                        if (c1 == '\0' || is_glob_special(c1))
 137                                break;
 138                        if (c1 != c2)
 139                                return 0;
 140                        match++;
 141                        name++;
 142                        namelen--;
 143                }
 144        } else {
 145                for (;;) {
 146                        unsigned char c1 = *match;
 147                        unsigned char c2 = *name;
 148                        if (c1 == '\0' || is_glob_special(c1))
 149                                break;
 150                        if (c1 != c2)
 151                                return 0;
 152                        match++;
 153                        name++;
 154                        namelen--;
 155                }
 156        }
 157
 158
 159        /*
 160         * If we don't match the matchstring exactly,
 161         * we need to match by fnmatch
 162         */
 163        matchlen = strlen(match);
 164        if (strncmp_icase(match, name, matchlen))
 165                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 166
 167        if (namelen == matchlen)
 168                return MATCHED_EXACTLY;
 169        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 170                return MATCHED_RECURSIVELY;
 171        return 0;
 172}
 173
 174/*
 175 * Given a name and a list of pathspecs, see if the name matches
 176 * any of the pathspecs.  The caller is also interested in seeing
 177 * all pathspec matches some names it calls this function with
 178 * (otherwise the user could have mistyped the unmatched pathspec),
 179 * and a mark is left in seen[] array for pathspec element that
 180 * actually matched anything.
 181 */
 182int match_pathspec(const char **pathspec, const char *name, int namelen,
 183                int prefix, char *seen)
 184{
 185        int i, retval = 0;
 186
 187        if (!pathspec)
 188                return 1;
 189
 190        name += prefix;
 191        namelen -= prefix;
 192
 193        for (i = 0; pathspec[i] != NULL; i++) {
 194                int how;
 195                const char *match = pathspec[i] + prefix;
 196                if (seen && seen[i] == MATCHED_EXACTLY)
 197                        continue;
 198                how = match_one(match, name, namelen);
 199                if (how) {
 200                        if (retval < how)
 201                                retval = how;
 202                        if (seen && seen[i] < how)
 203                                seen[i] = how;
 204                }
 205        }
 206        return retval;
 207}
 208
 209/*
 210 * Does 'match' match the given name?
 211 * A match is found if
 212 *
 213 * (1) the 'match' string is leading directory of 'name', or
 214 * (2) the 'match' string is a wildcard and matches 'name', or
 215 * (3) the 'match' string is exactly the same as 'name'.
 216 *
 217 * and the return value tells which case it was.
 218 *
 219 * It returns 0 when there is no match.
 220 */
 221static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 222                               const char *name, int namelen)
 223{
 224        /* name/namelen has prefix cut off by caller */
 225        const char *match = item->match + prefix;
 226        int matchlen = item->len - prefix;
 227
 228        /* If the match was just the prefix, we matched */
 229        if (!*match)
 230                return MATCHED_RECURSIVELY;
 231
 232        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 233                if (matchlen == namelen)
 234                        return MATCHED_EXACTLY;
 235
 236                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 237                        return MATCHED_RECURSIVELY;
 238        }
 239
 240        if (item->use_wildcard && !fnmatch(match, name, 0))
 241                return MATCHED_FNMATCH;
 242
 243        return 0;
 244}
 245
 246/*
 247 * Given a name and a list of pathspecs, see if the name matches
 248 * any of the pathspecs.  The caller is also interested in seeing
 249 * all pathspec matches some names it calls this function with
 250 * (otherwise the user could have mistyped the unmatched pathspec),
 251 * and a mark is left in seen[] array for pathspec element that
 252 * actually matched anything.
 253 */
 254int match_pathspec_depth(const struct pathspec *ps,
 255                         const char *name, int namelen,
 256                         int prefix, char *seen)
 257{
 258        int i, retval = 0;
 259
 260        if (!ps->nr) {
 261                if (!ps->recursive || ps->max_depth == -1)
 262                        return MATCHED_RECURSIVELY;
 263
 264                if (within_depth(name, namelen, 0, ps->max_depth))
 265                        return MATCHED_EXACTLY;
 266                else
 267                        return 0;
 268        }
 269
 270        name += prefix;
 271        namelen -= prefix;
 272
 273        for (i = ps->nr - 1; i >= 0; i--) {
 274                int how;
 275                if (seen && seen[i] == MATCHED_EXACTLY)
 276                        continue;
 277                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 278                if (ps->recursive && ps->max_depth != -1 &&
 279                    how && how != MATCHED_FNMATCH) {
 280                        int len = ps->items[i].len;
 281                        if (name[len] == '/')
 282                                len++;
 283                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 284                                how = MATCHED_EXACTLY;
 285                        else
 286                                how = 0;
 287                }
 288                if (how) {
 289                        if (retval < how)
 290                                retval = how;
 291                        if (seen && seen[i] < how)
 292                                seen[i] = how;
 293                }
 294        }
 295        return retval;
 296}
 297
 298static int no_wildcard(const char *string)
 299{
 300        return string[strcspn(string, "*?[{\\")] == '\0';
 301}
 302
 303void add_exclude(const char *string, const char *base,
 304                 int baselen, struct exclude_list *which)
 305{
 306        struct exclude *x;
 307        size_t len;
 308        int to_exclude = 1;
 309        int flags = 0;
 310
 311        if (*string == '!') {
 312                to_exclude = 0;
 313                string++;
 314        }
 315        len = strlen(string);
 316        if (len && string[len - 1] == '/') {
 317                char *s;
 318                x = xmalloc(sizeof(*x) + len);
 319                s = (char *)(x+1);
 320                memcpy(s, string, len - 1);
 321                s[len - 1] = '\0';
 322                string = s;
 323                x->pattern = s;
 324                flags = EXC_FLAG_MUSTBEDIR;
 325        } else {
 326                x = xmalloc(sizeof(*x));
 327                x->pattern = string;
 328        }
 329        x->to_exclude = to_exclude;
 330        x->patternlen = strlen(string);
 331        x->base = base;
 332        x->baselen = baselen;
 333        x->flags = flags;
 334        if (!strchr(string, '/'))
 335                x->flags |= EXC_FLAG_NODIR;
 336        if (no_wildcard(string))
 337                x->flags |= EXC_FLAG_NOWILDCARD;
 338        if (*string == '*' && no_wildcard(string+1))
 339                x->flags |= EXC_FLAG_ENDSWITH;
 340        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 341        which->excludes[which->nr++] = x;
 342}
 343
 344static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 345{
 346        int pos, len;
 347        unsigned long sz;
 348        enum object_type type;
 349        void *data;
 350        struct index_state *istate = &the_index;
 351
 352        len = strlen(path);
 353        pos = index_name_pos(istate, path, len);
 354        if (pos < 0)
 355                return NULL;
 356        if (!ce_skip_worktree(istate->cache[pos]))
 357                return NULL;
 358        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 359        if (!data || type != OBJ_BLOB) {
 360                free(data);
 361                return NULL;
 362        }
 363        *size = xsize_t(sz);
 364        return data;
 365}
 366
 367void free_excludes(struct exclude_list *el)
 368{
 369        int i;
 370
 371        for (i = 0; i < el->nr; i++)
 372                free(el->excludes[i]);
 373        free(el->excludes);
 374
 375        el->nr = 0;
 376        el->excludes = NULL;
 377}
 378
 379int add_excludes_from_file_to_list(const char *fname,
 380                                   const char *base,
 381                                   int baselen,
 382                                   char **buf_p,
 383                                   struct exclude_list *which,
 384                                   int check_index)
 385{
 386        struct stat st;
 387        int fd, i;
 388        size_t size = 0;
 389        char *buf, *entry;
 390
 391        fd = open(fname, O_RDONLY);
 392        if (fd < 0 || fstat(fd, &st) < 0) {
 393                if (0 <= fd)
 394                        close(fd);
 395                if (!check_index ||
 396                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 397                        return -1;
 398                if (size == 0) {
 399                        free(buf);
 400                        return 0;
 401                }
 402                if (buf[size-1] != '\n') {
 403                        buf = xrealloc(buf, size+1);
 404                        buf[size++] = '\n';
 405                }
 406        }
 407        else {
 408                size = xsize_t(st.st_size);
 409                if (size == 0) {
 410                        close(fd);
 411                        return 0;
 412                }
 413                buf = xmalloc(size+1);
 414                if (read_in_full(fd, buf, size) != size) {
 415                        free(buf);
 416                        close(fd);
 417                        return -1;
 418                }
 419                buf[size++] = '\n';
 420                close(fd);
 421        }
 422
 423        if (buf_p)
 424                *buf_p = buf;
 425        entry = buf;
 426        for (i = 0; i < size; i++) {
 427                if (buf[i] == '\n') {
 428                        if (entry != buf + i && entry[0] != '#') {
 429                                buf[i - (i && buf[i-1] == '\r')] = 0;
 430                                add_exclude(entry, base, baselen, which);
 431                        }
 432                        entry = buf + i + 1;
 433                }
 434        }
 435        return 0;
 436}
 437
 438void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 439{
 440        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 441                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 442                die("cannot use %s as an exclude file", fname);
 443}
 444
 445static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 446{
 447        struct exclude_list *el;
 448        struct exclude_stack *stk = NULL;
 449        int current;
 450
 451        if ((!dir->exclude_per_dir) ||
 452            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 453                return; /* too long a path -- ignore */
 454
 455        /* Pop the ones that are not the prefix of the path being checked. */
 456        el = &dir->exclude_list[EXC_DIRS];
 457        while ((stk = dir->exclude_stack) != NULL) {
 458                if (stk->baselen <= baselen &&
 459                    !strncmp(dir->basebuf, base, stk->baselen))
 460                        break;
 461                dir->exclude_stack = stk->prev;
 462                while (stk->exclude_ix < el->nr)
 463                        free(el->excludes[--el->nr]);
 464                free(stk->filebuf);
 465                free(stk);
 466        }
 467
 468        /* Read from the parent directories and push them down. */
 469        current = stk ? stk->baselen : -1;
 470        while (current < baselen) {
 471                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 472                const char *cp;
 473
 474                if (current < 0) {
 475                        cp = base;
 476                        current = 0;
 477                }
 478                else {
 479                        cp = strchr(base + current + 1, '/');
 480                        if (!cp)
 481                                die("oops in prep_exclude");
 482                        cp++;
 483                }
 484                stk->prev = dir->exclude_stack;
 485                stk->baselen = cp - base;
 486                stk->exclude_ix = el->nr;
 487                memcpy(dir->basebuf + current, base + current,
 488                       stk->baselen - current);
 489                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 490                add_excludes_from_file_to_list(dir->basebuf,
 491                                               dir->basebuf, stk->baselen,
 492                                               &stk->filebuf, el, 1);
 493                dir->exclude_stack = stk;
 494                current = stk->baselen;
 495        }
 496        dir->basebuf[baselen] = '\0';
 497}
 498
 499/* Scan the list and let the last match determine the fate.
 500 * Return 1 for exclude, 0 for include and -1 for undecided.
 501 */
 502int excluded_from_list(const char *pathname,
 503                       int pathlen, const char *basename, int *dtype,
 504                       struct exclude_list *el)
 505{
 506        int i;
 507
 508        if (el->nr) {
 509                for (i = el->nr - 1; 0 <= i; i--) {
 510                        struct exclude *x = el->excludes[i];
 511                        const char *exclude = x->pattern;
 512                        int to_exclude = x->to_exclude;
 513
 514                        if (x->flags & EXC_FLAG_MUSTBEDIR) {
 515                                if (*dtype == DT_UNKNOWN)
 516                                        *dtype = get_dtype(NULL, pathname, pathlen);
 517                                if (*dtype != DT_DIR)
 518                                        continue;
 519                        }
 520
 521                        if (x->flags & EXC_FLAG_NODIR) {
 522                                /* match basename */
 523                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 524                                        if (!strcmp_icase(exclude, basename))
 525                                                return to_exclude;
 526                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
 527                                        if (x->patternlen - 1 <= pathlen &&
 528                                            !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
 529                                                return to_exclude;
 530                                } else {
 531                                        if (fnmatch_icase(exclude, basename, 0) == 0)
 532                                                return to_exclude;
 533                                }
 534                        }
 535                        else {
 536                                /* match with FNM_PATHNAME:
 537                                 * exclude has base (baselen long) implicitly
 538                                 * in front of it.
 539                                 */
 540                                int baselen = x->baselen;
 541                                if (*exclude == '/')
 542                                        exclude++;
 543
 544                                if (pathlen < baselen ||
 545                                    (baselen && pathname[baselen-1] != '/') ||
 546                                    strncmp_icase(pathname, x->base, baselen))
 547                                    continue;
 548
 549                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 550                                        if (!strcmp_icase(exclude, pathname + baselen))
 551                                                return to_exclude;
 552                                } else {
 553                                        if (fnmatch_icase(exclude, pathname+baselen,
 554                                                    FNM_PATHNAME) == 0)
 555                                            return to_exclude;
 556                                }
 557                        }
 558                }
 559        }
 560        return -1; /* undecided */
 561}
 562
 563int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 564{
 565        int pathlen = strlen(pathname);
 566        int st;
 567        const char *basename = strrchr(pathname, '/');
 568        basename = (basename) ? basename+1 : pathname;
 569
 570        prep_exclude(dir, pathname, basename-pathname);
 571        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 572                switch (excluded_from_list(pathname, pathlen, basename,
 573                                           dtype_p, &dir->exclude_list[st])) {
 574                case 0:
 575                        return 0;
 576                case 1:
 577                        return 1;
 578                }
 579        }
 580        return 0;
 581}
 582
 583void path_exclude_check_init(struct path_exclude_check *check,
 584                             struct dir_struct *dir)
 585{
 586        check->dir = dir;
 587        strbuf_init(&check->path, 256);
 588}
 589
 590void path_exclude_check_clear(struct path_exclude_check *check)
 591{
 592        strbuf_release(&check->path);
 593}
 594
 595int path_excluded(struct path_exclude_check *check, struct cache_entry *ce)
 596{
 597        int i, dtype;
 598        struct strbuf *path = &check->path;
 599
 600        strbuf_setlen(path, 0);
 601        for (i = 0; ce->name[i]; i++) {
 602                int ch = ce->name[i];
 603
 604                if (ch == '/') {
 605                        dtype = DT_DIR;
 606                        if (excluded(check->dir, path->buf, &dtype))
 607                                return 1;
 608                }
 609                strbuf_addch(path, ch);
 610        }
 611        dtype = ce_to_dtype(ce);
 612        return excluded(check->dir, ce->name, &dtype);
 613}
 614
 615static struct dir_entry *dir_entry_new(const char *pathname, int len)
 616{
 617        struct dir_entry *ent;
 618
 619        ent = xmalloc(sizeof(*ent) + len + 1);
 620        ent->len = len;
 621        memcpy(ent->name, pathname, len);
 622        ent->name[len] = 0;
 623        return ent;
 624}
 625
 626static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 627{
 628        if (cache_name_exists(pathname, len, ignore_case))
 629                return NULL;
 630
 631        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 632        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 633}
 634
 635struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 636{
 637        if (!cache_name_is_other(pathname, len))
 638                return NULL;
 639
 640        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 641        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 642}
 643
 644enum exist_status {
 645        index_nonexistent = 0,
 646        index_directory,
 647        index_gitdir
 648};
 649
 650/*
 651 * Do not use the alphabetically stored index to look up
 652 * the directory name; instead, use the case insensitive
 653 * name hash.
 654 */
 655static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 656{
 657        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 658        unsigned char endchar;
 659
 660        if (!ce)
 661                return index_nonexistent;
 662        endchar = ce->name[len];
 663
 664        /*
 665         * The cache_entry structure returned will contain this dirname
 666         * and possibly additional path components.
 667         */
 668        if (endchar == '/')
 669                return index_directory;
 670
 671        /*
 672         * If there are no additional path components, then this cache_entry
 673         * represents a submodule.  Submodules, despite being directories,
 674         * are stored in the cache without a closing slash.
 675         */
 676        if (!endchar && S_ISGITLINK(ce->ce_mode))
 677                return index_gitdir;
 678
 679        /* This should never be hit, but it exists just in case. */
 680        return index_nonexistent;
 681}
 682
 683/*
 684 * The index sorts alphabetically by entry name, which
 685 * means that a gitlink sorts as '\0' at the end, while
 686 * a directory (which is defined not as an entry, but as
 687 * the files it contains) will sort with the '/' at the
 688 * end.
 689 */
 690static enum exist_status directory_exists_in_index(const char *dirname, int len)
 691{
 692        int pos;
 693
 694        if (ignore_case)
 695                return directory_exists_in_index_icase(dirname, len);
 696
 697        pos = cache_name_pos(dirname, len);
 698        if (pos < 0)
 699                pos = -pos-1;
 700        while (pos < active_nr) {
 701                struct cache_entry *ce = active_cache[pos++];
 702                unsigned char endchar;
 703
 704                if (strncmp(ce->name, dirname, len))
 705                        break;
 706                endchar = ce->name[len];
 707                if (endchar > '/')
 708                        break;
 709                if (endchar == '/')
 710                        return index_directory;
 711                if (!endchar && S_ISGITLINK(ce->ce_mode))
 712                        return index_gitdir;
 713        }
 714        return index_nonexistent;
 715}
 716
 717/*
 718 * When we find a directory when traversing the filesystem, we
 719 * have three distinct cases:
 720 *
 721 *  - ignore it
 722 *  - see it as a directory
 723 *  - recurse into it
 724 *
 725 * and which one we choose depends on a combination of existing
 726 * git index contents and the flags passed into the directory
 727 * traversal routine.
 728 *
 729 * Case 1: If we *already* have entries in the index under that
 730 * directory name, we always recurse into the directory to see
 731 * all the files.
 732 *
 733 * Case 2: If we *already* have that directory name as a gitlink,
 734 * we always continue to see it as a gitlink, regardless of whether
 735 * there is an actual git directory there or not (it might not
 736 * be checked out as a subproject!)
 737 *
 738 * Case 3: if we didn't have it in the index previously, we
 739 * have a few sub-cases:
 740 *
 741 *  (a) if "show_other_directories" is true, we show it as
 742 *      just a directory, unless "hide_empty_directories" is
 743 *      also true and the directory is empty, in which case
 744 *      we just ignore it entirely.
 745 *  (b) if it looks like a git directory, and we don't have
 746 *      'no_gitlinks' set we treat it as a gitlink, and show it
 747 *      as a directory.
 748 *  (c) otherwise, we recurse into it.
 749 */
 750enum directory_treatment {
 751        show_directory,
 752        ignore_directory,
 753        recurse_into_directory
 754};
 755
 756static enum directory_treatment treat_directory(struct dir_struct *dir,
 757        const char *dirname, int len,
 758        const struct path_simplify *simplify)
 759{
 760        /* The "len-1" is to strip the final '/' */
 761        switch (directory_exists_in_index(dirname, len-1)) {
 762        case index_directory:
 763                return recurse_into_directory;
 764
 765        case index_gitdir:
 766                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 767                        return ignore_directory;
 768                return show_directory;
 769
 770        case index_nonexistent:
 771                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 772                        break;
 773                if (!(dir->flags & DIR_NO_GITLINKS)) {
 774                        unsigned char sha1[20];
 775                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 776                                return show_directory;
 777                }
 778                return recurse_into_directory;
 779        }
 780
 781        /* This is the "show_other_directories" case */
 782        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 783                return show_directory;
 784        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 785                return ignore_directory;
 786        return show_directory;
 787}
 788
 789/*
 790 * This is an inexact early pruning of any recursive directory
 791 * reading - if the path cannot possibly be in the pathspec,
 792 * return true, and we'll skip it early.
 793 */
 794static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 795{
 796        if (simplify) {
 797                for (;;) {
 798                        const char *match = simplify->path;
 799                        int len = simplify->len;
 800
 801                        if (!match)
 802                                break;
 803                        if (len > pathlen)
 804                                len = pathlen;
 805                        if (!memcmp(path, match, len))
 806                                return 0;
 807                        simplify++;
 808                }
 809                return 1;
 810        }
 811        return 0;
 812}
 813
 814/*
 815 * This function tells us whether an excluded path matches a
 816 * list of "interesting" pathspecs. That is, whether a path matched
 817 * by any of the pathspecs could possibly be ignored by excluding
 818 * the specified path. This can happen if:
 819 *
 820 *   1. the path is mentioned explicitly in the pathspec
 821 *
 822 *   2. the path is a directory prefix of some element in the
 823 *      pathspec
 824 */
 825static int exclude_matches_pathspec(const char *path, int len,
 826                const struct path_simplify *simplify)
 827{
 828        if (simplify) {
 829                for (; simplify->path; simplify++) {
 830                        if (len == simplify->len
 831                            && !memcmp(path, simplify->path, len))
 832                                return 1;
 833                        if (len < simplify->len
 834                            && simplify->path[len] == '/'
 835                            && !memcmp(path, simplify->path, len))
 836                                return 1;
 837                }
 838        }
 839        return 0;
 840}
 841
 842static int get_index_dtype(const char *path, int len)
 843{
 844        int pos;
 845        struct cache_entry *ce;
 846
 847        ce = cache_name_exists(path, len, 0);
 848        if (ce) {
 849                if (!ce_uptodate(ce))
 850                        return DT_UNKNOWN;
 851                if (S_ISGITLINK(ce->ce_mode))
 852                        return DT_DIR;
 853                /*
 854                 * Nobody actually cares about the
 855                 * difference between DT_LNK and DT_REG
 856                 */
 857                return DT_REG;
 858        }
 859
 860        /* Try to look it up as a directory */
 861        pos = cache_name_pos(path, len);
 862        if (pos >= 0)
 863                return DT_UNKNOWN;
 864        pos = -pos-1;
 865        while (pos < active_nr) {
 866                ce = active_cache[pos++];
 867                if (strncmp(ce->name, path, len))
 868                        break;
 869                if (ce->name[len] > '/')
 870                        break;
 871                if (ce->name[len] < '/')
 872                        continue;
 873                if (!ce_uptodate(ce))
 874                        break;  /* continue? */
 875                return DT_DIR;
 876        }
 877        return DT_UNKNOWN;
 878}
 879
 880static int get_dtype(struct dirent *de, const char *path, int len)
 881{
 882        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 883        struct stat st;
 884
 885        if (dtype != DT_UNKNOWN)
 886                return dtype;
 887        dtype = get_index_dtype(path, len);
 888        if (dtype != DT_UNKNOWN)
 889                return dtype;
 890        if (lstat(path, &st))
 891                return dtype;
 892        if (S_ISREG(st.st_mode))
 893                return DT_REG;
 894        if (S_ISDIR(st.st_mode))
 895                return DT_DIR;
 896        if (S_ISLNK(st.st_mode))
 897                return DT_LNK;
 898        return dtype;
 899}
 900
 901enum path_treatment {
 902        path_ignored,
 903        path_handled,
 904        path_recurse
 905};
 906
 907static enum path_treatment treat_one_path(struct dir_struct *dir,
 908                                          char *path, int *len,
 909                                          const struct path_simplify *simplify,
 910                                          int dtype, struct dirent *de)
 911{
 912        int exclude = excluded(dir, path, &dtype);
 913        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 914            && exclude_matches_pathspec(path, *len, simplify))
 915                dir_add_ignored(dir, path, *len);
 916
 917        /*
 918         * Excluded? If we don't explicitly want to show
 919         * ignored files, ignore it
 920         */
 921        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
 922                return path_ignored;
 923
 924        if (dtype == DT_UNKNOWN)
 925                dtype = get_dtype(de, path, *len);
 926
 927        /*
 928         * Do we want to see just the ignored files?
 929         * We still need to recurse into directories,
 930         * even if we don't ignore them, since the
 931         * directory may contain files that we do..
 932         */
 933        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
 934                if (dtype != DT_DIR)
 935                        return path_ignored;
 936        }
 937
 938        switch (dtype) {
 939        default:
 940                return path_ignored;
 941        case DT_DIR:
 942                memcpy(path + *len, "/", 2);
 943                (*len)++;
 944                switch (treat_directory(dir, path, *len, simplify)) {
 945                case show_directory:
 946                        if (exclude != !!(dir->flags
 947                                          & DIR_SHOW_IGNORED))
 948                                return path_ignored;
 949                        break;
 950                case recurse_into_directory:
 951                        return path_recurse;
 952                case ignore_directory:
 953                        return path_ignored;
 954                }
 955                break;
 956        case DT_REG:
 957        case DT_LNK:
 958                break;
 959        }
 960        return path_handled;
 961}
 962
 963static enum path_treatment treat_path(struct dir_struct *dir,
 964                                      struct dirent *de,
 965                                      char *path, int path_max,
 966                                      int baselen,
 967                                      const struct path_simplify *simplify,
 968                                      int *len)
 969{
 970        int dtype;
 971
 972        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 973                return path_ignored;
 974        *len = strlen(de->d_name);
 975        /* Ignore overly long pathnames! */
 976        if (*len + baselen + 8 > path_max)
 977                return path_ignored;
 978        memcpy(path + baselen, de->d_name, *len + 1);
 979        *len += baselen;
 980        if (simplify_away(path, *len, simplify))
 981                return path_ignored;
 982
 983        dtype = DTYPE(de);
 984        return treat_one_path(dir, path, len, simplify, dtype, de);
 985}
 986
 987/*
 988 * Read a directory tree. We currently ignore anything but
 989 * directories, regular files and symlinks. That's because git
 990 * doesn't handle them at all yet. Maybe that will change some
 991 * day.
 992 *
 993 * Also, we ignore the name ".git" (even if it is not a directory).
 994 * That likely will not change.
 995 */
 996static int read_directory_recursive(struct dir_struct *dir,
 997                                    const char *base, int baselen,
 998                                    int check_only,
 999                                    const struct path_simplify *simplify)
1000{
1001        DIR *fdir = opendir(*base ? base : ".");
1002        int contents = 0;
1003        struct dirent *de;
1004        char path[PATH_MAX + 1];
1005
1006        if (!fdir)
1007                return 0;
1008
1009        memcpy(path, base, baselen);
1010
1011        while ((de = readdir(fdir)) != NULL) {
1012                int len;
1013                switch (treat_path(dir, de, path, sizeof(path),
1014                                   baselen, simplify, &len)) {
1015                case path_recurse:
1016                        contents += read_directory_recursive(dir, path, len, 0, simplify);
1017                        continue;
1018                case path_ignored:
1019                        continue;
1020                case path_handled:
1021                        break;
1022                }
1023                contents++;
1024                if (check_only)
1025                        goto exit_early;
1026                else
1027                        dir_add_name(dir, path, len);
1028        }
1029exit_early:
1030        closedir(fdir);
1031
1032        return contents;
1033}
1034
1035static int cmp_name(const void *p1, const void *p2)
1036{
1037        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1038        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1039
1040        return cache_name_compare(e1->name, e1->len,
1041                                  e2->name, e2->len);
1042}
1043
1044/*
1045 * Return the length of the "simple" part of a path match limiter.
1046 */
1047static int simple_length(const char *match)
1048{
1049        int len = -1;
1050
1051        for (;;) {
1052                unsigned char c = *match++;
1053                len++;
1054                if (c == '\0' || is_glob_special(c))
1055                        return len;
1056        }
1057}
1058
1059static struct path_simplify *create_simplify(const char **pathspec)
1060{
1061        int nr, alloc = 0;
1062        struct path_simplify *simplify = NULL;
1063
1064        if (!pathspec)
1065                return NULL;
1066
1067        for (nr = 0 ; ; nr++) {
1068                const char *match;
1069                if (nr >= alloc) {
1070                        alloc = alloc_nr(alloc);
1071                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1072                }
1073                match = *pathspec++;
1074                if (!match)
1075                        break;
1076                simplify[nr].path = match;
1077                simplify[nr].len = simple_length(match);
1078        }
1079        simplify[nr].path = NULL;
1080        simplify[nr].len = 0;
1081        return simplify;
1082}
1083
1084static void free_simplify(struct path_simplify *simplify)
1085{
1086        free(simplify);
1087}
1088
1089static int treat_leading_path(struct dir_struct *dir,
1090                              const char *path, int len,
1091                              const struct path_simplify *simplify)
1092{
1093        char pathbuf[PATH_MAX];
1094        int baselen, blen;
1095        const char *cp;
1096
1097        while (len && path[len - 1] == '/')
1098                len--;
1099        if (!len)
1100                return 1;
1101        baselen = 0;
1102        while (1) {
1103                cp = path + baselen + !!baselen;
1104                cp = memchr(cp, '/', path + len - cp);
1105                if (!cp)
1106                        baselen = len;
1107                else
1108                        baselen = cp - path;
1109                memcpy(pathbuf, path, baselen);
1110                pathbuf[baselen] = '\0';
1111                if (!is_directory(pathbuf))
1112                        return 0;
1113                if (simplify_away(pathbuf, baselen, simplify))
1114                        return 0;
1115                blen = baselen;
1116                if (treat_one_path(dir, pathbuf, &blen, simplify,
1117                                   DT_DIR, NULL) == path_ignored)
1118                        return 0; /* do not recurse into it */
1119                if (len <= baselen)
1120                        return 1; /* finished checking */
1121        }
1122}
1123
1124int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1125{
1126        struct path_simplify *simplify;
1127
1128        if (has_symlink_leading_path(path, len))
1129                return dir->nr;
1130
1131        simplify = create_simplify(pathspec);
1132        if (!len || treat_leading_path(dir, path, len, simplify))
1133                read_directory_recursive(dir, path, len, 0, simplify);
1134        free_simplify(simplify);
1135        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1136        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1137        return dir->nr;
1138}
1139
1140int file_exists(const char *f)
1141{
1142        struct stat sb;
1143        return lstat(f, &sb) == 0;
1144}
1145
1146/*
1147 * Given two normalized paths (a trailing slash is ok), if subdir is
1148 * outside dir, return -1.  Otherwise return the offset in subdir that
1149 * can be used as relative path to dir.
1150 */
1151int dir_inside_of(const char *subdir, const char *dir)
1152{
1153        int offset = 0;
1154
1155        assert(dir && subdir && *dir && *subdir);
1156
1157        while (*dir && *subdir && *dir == *subdir) {
1158                dir++;
1159                subdir++;
1160                offset++;
1161        }
1162
1163        /* hel[p]/me vs hel[l]/yeah */
1164        if (*dir && *subdir)
1165                return -1;
1166
1167        if (!*subdir)
1168                return !*dir ? offset : -1; /* same dir */
1169
1170        /* foo/[b]ar vs foo/[] */
1171        if (is_dir_sep(dir[-1]))
1172                return is_dir_sep(subdir[-1]) ? offset : -1;
1173
1174        /* foo[/]bar vs foo[] */
1175        return is_dir_sep(*subdir) ? offset + 1 : -1;
1176}
1177
1178int is_inside_dir(const char *dir)
1179{
1180        char cwd[PATH_MAX];
1181        if (!dir)
1182                return 0;
1183        if (!getcwd(cwd, sizeof(cwd)))
1184                die_errno("can't find the current directory");
1185        return dir_inside_of(cwd, dir) >= 0;
1186}
1187
1188int is_empty_dir(const char *path)
1189{
1190        DIR *dir = opendir(path);
1191        struct dirent *e;
1192        int ret = 1;
1193
1194        if (!dir)
1195                return 0;
1196
1197        while ((e = readdir(dir)) != NULL)
1198                if (!is_dot_or_dotdot(e->d_name)) {
1199                        ret = 0;
1200                        break;
1201                }
1202
1203        closedir(dir);
1204        return ret;
1205}
1206
1207int remove_dir_recursively(struct strbuf *path, int flag)
1208{
1209        DIR *dir;
1210        struct dirent *e;
1211        int ret = 0, original_len = path->len, len;
1212        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1213        unsigned char submodule_head[20];
1214
1215        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1216            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head))
1217                /* Do not descend and nuke a nested git work tree. */
1218                return 0;
1219
1220        dir = opendir(path->buf);
1221        if (!dir)
1222                return rmdir(path->buf);
1223        if (path->buf[original_len - 1] != '/')
1224                strbuf_addch(path, '/');
1225
1226        len = path->len;
1227        while ((e = readdir(dir)) != NULL) {
1228                struct stat st;
1229                if (is_dot_or_dotdot(e->d_name))
1230                        continue;
1231
1232                strbuf_setlen(path, len);
1233                strbuf_addstr(path, e->d_name);
1234                if (lstat(path->buf, &st))
1235                        ; /* fall thru */
1236                else if (S_ISDIR(st.st_mode)) {
1237                        if (!remove_dir_recursively(path, only_empty))
1238                                continue; /* happy */
1239                } else if (!only_empty && !unlink(path->buf))
1240                        continue; /* happy, too */
1241
1242                /* path too long, stat fails, or non-directory still exists */
1243                ret = -1;
1244                break;
1245        }
1246        closedir(dir);
1247
1248        strbuf_setlen(path, original_len);
1249        if (!ret)
1250                ret = rmdir(path->buf);
1251        return ret;
1252}
1253
1254void setup_standard_excludes(struct dir_struct *dir)
1255{
1256        const char *path;
1257
1258        dir->exclude_per_dir = ".gitignore";
1259        path = git_path("info/exclude");
1260        if (!access(path, R_OK))
1261                add_excludes_from_file(dir, path);
1262        if (excludes_file && !access(excludes_file, R_OK))
1263                add_excludes_from_file(dir, excludes_file);
1264}
1265
1266int remove_path(const char *name)
1267{
1268        char *slash;
1269
1270        if (unlink(name) && errno != ENOENT)
1271                return -1;
1272
1273        slash = strrchr(name, '/');
1274        if (slash) {
1275                char *dirs = xstrdup(name);
1276                slash = dirs + (slash - name);
1277                do {
1278                        *slash = '\0';
1279                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1280                free(dirs);
1281        }
1282        return 0;
1283}
1284
1285static int pathspec_item_cmp(const void *a_, const void *b_)
1286{
1287        struct pathspec_item *a, *b;
1288
1289        a = (struct pathspec_item *)a_;
1290        b = (struct pathspec_item *)b_;
1291        return strcmp(a->match, b->match);
1292}
1293
1294int init_pathspec(struct pathspec *pathspec, const char **paths)
1295{
1296        const char **p = paths;
1297        int i;
1298
1299        memset(pathspec, 0, sizeof(*pathspec));
1300        if (!p)
1301                return 0;
1302        while (*p)
1303                p++;
1304        pathspec->raw = paths;
1305        pathspec->nr = p - paths;
1306        if (!pathspec->nr)
1307                return 0;
1308
1309        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1310        for (i = 0; i < pathspec->nr; i++) {
1311                struct pathspec_item *item = pathspec->items+i;
1312                const char *path = paths[i];
1313
1314                item->match = path;
1315                item->len = strlen(path);
1316                item->use_wildcard = !no_wildcard(path);
1317                if (item->use_wildcard)
1318                        pathspec->has_wildcard = 1;
1319        }
1320
1321        qsort(pathspec->items, pathspec->nr,
1322              sizeof(struct pathspec_item), pathspec_item_cmp);
1323
1324        return 0;
1325}
1326
1327void free_pathspec(struct pathspec *pathspec)
1328{
1329        free(pathspec->items);
1330        pathspec->items = NULL;
1331}