dir.con commit commit: make default of "cleanup" option configurable (51fb3a3)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37inline int git_fnmatch(const char *pattern, const char *string,
  38                       int flags, int prefix)
  39{
  40        int fnm_flags = 0;
  41        if (flags & GFNM_PATHNAME)
  42                fnm_flags |= FNM_PATHNAME;
  43        if (prefix > 0) {
  44                if (strncmp(pattern, string, prefix))
  45                        return FNM_NOMATCH;
  46                pattern += prefix;
  47                string += prefix;
  48        }
  49        if (flags & GFNM_ONESTAR) {
  50                int pattern_len = strlen(++pattern);
  51                int string_len = strlen(string);
  52                return string_len < pattern_len ||
  53                       strcmp(pattern,
  54                              string + string_len - pattern_len);
  55        }
  56        return fnmatch(pattern, string, fnm_flags);
  57}
  58
  59static size_t common_prefix_len(const char **pathspec)
  60{
  61        const char *n, *first;
  62        size_t max = 0;
  63        int literal = limit_pathspec_to_literal();
  64
  65        if (!pathspec)
  66                return max;
  67
  68        first = *pathspec;
  69        while ((n = *pathspec++)) {
  70                size_t i, len = 0;
  71                for (i = 0; first == n || i < max; i++) {
  72                        char c = n[i];
  73                        if (!c || c != first[i] || (!literal && is_glob_special(c)))
  74                                break;
  75                        if (c == '/')
  76                                len = i + 1;
  77                }
  78                if (first == n || len < max) {
  79                        max = len;
  80                        if (!max)
  81                                break;
  82                }
  83        }
  84        return max;
  85}
  86
  87/*
  88 * Returns a copy of the longest leading path common among all
  89 * pathspecs.
  90 */
  91char *common_prefix(const char **pathspec)
  92{
  93        unsigned long len = common_prefix_len(pathspec);
  94
  95        return len ? xmemdupz(*pathspec, len) : NULL;
  96}
  97
  98int fill_directory(struct dir_struct *dir, const char **pathspec)
  99{
 100        size_t len;
 101
 102        /*
 103         * Calculate common prefix for the pathspec, and
 104         * use that to optimize the directory walk
 105         */
 106        len = common_prefix_len(pathspec);
 107
 108        /* Read the directory and prune it */
 109        read_directory(dir, pathspec ? *pathspec : "", len, pathspec);
 110        return len;
 111}
 112
 113int within_depth(const char *name, int namelen,
 114                        int depth, int max_depth)
 115{
 116        const char *cp = name, *cpe = name + namelen;
 117
 118        while (cp < cpe) {
 119                if (*cp++ != '/')
 120                        continue;
 121                depth++;
 122                if (depth > max_depth)
 123                        return 0;
 124        }
 125        return 1;
 126}
 127
 128/*
 129 * Does 'match' match the given name?
 130 * A match is found if
 131 *
 132 * (1) the 'match' string is leading directory of 'name', or
 133 * (2) the 'match' string is a wildcard and matches 'name', or
 134 * (3) the 'match' string is exactly the same as 'name'.
 135 *
 136 * and the return value tells which case it was.
 137 *
 138 * It returns 0 when there is no match.
 139 */
 140static int match_one(const char *match, const char *name, int namelen)
 141{
 142        int matchlen;
 143        int literal = limit_pathspec_to_literal();
 144
 145        /* If the match was just the prefix, we matched */
 146        if (!*match)
 147                return MATCHED_RECURSIVELY;
 148
 149        if (ignore_case) {
 150                for (;;) {
 151                        unsigned char c1 = tolower(*match);
 152                        unsigned char c2 = tolower(*name);
 153                        if (c1 == '\0' || (!literal && is_glob_special(c1)))
 154                                break;
 155                        if (c1 != c2)
 156                                return 0;
 157                        match++;
 158                        name++;
 159                        namelen--;
 160                }
 161        } else {
 162                for (;;) {
 163                        unsigned char c1 = *match;
 164                        unsigned char c2 = *name;
 165                        if (c1 == '\0' || (!literal && is_glob_special(c1)))
 166                                break;
 167                        if (c1 != c2)
 168                                return 0;
 169                        match++;
 170                        name++;
 171                        namelen--;
 172                }
 173        }
 174
 175        /*
 176         * If we don't match the matchstring exactly,
 177         * we need to match by fnmatch
 178         */
 179        matchlen = strlen(match);
 180        if (strncmp_icase(match, name, matchlen)) {
 181                if (literal)
 182                        return 0;
 183                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 184        }
 185
 186        if (namelen == matchlen)
 187                return MATCHED_EXACTLY;
 188        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 189                return MATCHED_RECURSIVELY;
 190        return 0;
 191}
 192
 193/*
 194 * Given a name and a list of pathspecs, see if the name matches
 195 * any of the pathspecs.  The caller is also interested in seeing
 196 * all pathspec matches some names it calls this function with
 197 * (otherwise the user could have mistyped the unmatched pathspec),
 198 * and a mark is left in seen[] array for pathspec element that
 199 * actually matched anything.
 200 */
 201int match_pathspec(const char **pathspec, const char *name, int namelen,
 202                int prefix, char *seen)
 203{
 204        int i, retval = 0;
 205
 206        if (!pathspec)
 207                return 1;
 208
 209        name += prefix;
 210        namelen -= prefix;
 211
 212        for (i = 0; pathspec[i] != NULL; i++) {
 213                int how;
 214                const char *match = pathspec[i] + prefix;
 215                if (seen && seen[i] == MATCHED_EXACTLY)
 216                        continue;
 217                how = match_one(match, name, namelen);
 218                if (how) {
 219                        if (retval < how)
 220                                retval = how;
 221                        if (seen && seen[i] < how)
 222                                seen[i] = how;
 223                }
 224        }
 225        return retval;
 226}
 227
 228/*
 229 * Does 'match' match the given name?
 230 * A match is found if
 231 *
 232 * (1) the 'match' string is leading directory of 'name', or
 233 * (2) the 'match' string is a wildcard and matches 'name', or
 234 * (3) the 'match' string is exactly the same as 'name'.
 235 *
 236 * and the return value tells which case it was.
 237 *
 238 * It returns 0 when there is no match.
 239 */
 240static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 241                               const char *name, int namelen)
 242{
 243        /* name/namelen has prefix cut off by caller */
 244        const char *match = item->match + prefix;
 245        int matchlen = item->len - prefix;
 246
 247        /* If the match was just the prefix, we matched */
 248        if (!*match)
 249                return MATCHED_RECURSIVELY;
 250
 251        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 252                if (matchlen == namelen)
 253                        return MATCHED_EXACTLY;
 254
 255                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 256                        return MATCHED_RECURSIVELY;
 257        }
 258
 259        if (item->nowildcard_len < item->len &&
 260            !git_fnmatch(match, name,
 261                         item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 262                         item->nowildcard_len - prefix))
 263                return MATCHED_FNMATCH;
 264
 265        return 0;
 266}
 267
 268/*
 269 * Given a name and a list of pathspecs, see if the name matches
 270 * any of the pathspecs.  The caller is also interested in seeing
 271 * all pathspec matches some names it calls this function with
 272 * (otherwise the user could have mistyped the unmatched pathspec),
 273 * and a mark is left in seen[] array for pathspec element that
 274 * actually matched anything.
 275 */
 276int match_pathspec_depth(const struct pathspec *ps,
 277                         const char *name, int namelen,
 278                         int prefix, char *seen)
 279{
 280        int i, retval = 0;
 281
 282        if (!ps->nr) {
 283                if (!ps->recursive || ps->max_depth == -1)
 284                        return MATCHED_RECURSIVELY;
 285
 286                if (within_depth(name, namelen, 0, ps->max_depth))
 287                        return MATCHED_EXACTLY;
 288                else
 289                        return 0;
 290        }
 291
 292        name += prefix;
 293        namelen -= prefix;
 294
 295        for (i = ps->nr - 1; i >= 0; i--) {
 296                int how;
 297                if (seen && seen[i] == MATCHED_EXACTLY)
 298                        continue;
 299                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 300                if (ps->recursive && ps->max_depth != -1 &&
 301                    how && how != MATCHED_FNMATCH) {
 302                        int len = ps->items[i].len;
 303                        if (name[len] == '/')
 304                                len++;
 305                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 306                                how = MATCHED_EXACTLY;
 307                        else
 308                                how = 0;
 309                }
 310                if (how) {
 311                        if (retval < how)
 312                                retval = how;
 313                        if (seen && seen[i] < how)
 314                                seen[i] = how;
 315                }
 316        }
 317        return retval;
 318}
 319
 320/*
 321 * Return the length of the "simple" part of a path match limiter.
 322 */
 323static int simple_length(const char *match)
 324{
 325        int len = -1;
 326
 327        for (;;) {
 328                unsigned char c = *match++;
 329                len++;
 330                if (c == '\0' || is_glob_special(c))
 331                        return len;
 332        }
 333}
 334
 335static int no_wildcard(const char *string)
 336{
 337        return string[simple_length(string)] == '\0';
 338}
 339
 340void parse_exclude_pattern(const char **pattern,
 341                           int *patternlen,
 342                           int *flags,
 343                           int *nowildcardlen)
 344{
 345        const char *p = *pattern;
 346        size_t i, len;
 347
 348        *flags = 0;
 349        if (*p == '!') {
 350                *flags |= EXC_FLAG_NEGATIVE;
 351                p++;
 352        }
 353        len = strlen(p);
 354        if (len && p[len - 1] == '/') {
 355                len--;
 356                *flags |= EXC_FLAG_MUSTBEDIR;
 357        }
 358        for (i = 0; i < len; i++) {
 359                if (p[i] == '/')
 360                        break;
 361        }
 362        if (i == len)
 363                *flags |= EXC_FLAG_NODIR;
 364        *nowildcardlen = simple_length(p);
 365        /*
 366         * we should have excluded the trailing slash from 'p' too,
 367         * but that's one more allocation. Instead just make sure
 368         * nowildcardlen does not exceed real patternlen
 369         */
 370        if (*nowildcardlen > len)
 371                *nowildcardlen = len;
 372        if (*p == '*' && no_wildcard(p + 1))
 373                *flags |= EXC_FLAG_ENDSWITH;
 374        *pattern = p;
 375        *patternlen = len;
 376}
 377
 378void add_exclude(const char *string, const char *base,
 379                 int baselen, struct exclude_list *which)
 380{
 381        struct exclude *x;
 382        int patternlen;
 383        int flags;
 384        int nowildcardlen;
 385
 386        parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
 387        if (flags & EXC_FLAG_MUSTBEDIR) {
 388                char *s;
 389                x = xmalloc(sizeof(*x) + patternlen + 1);
 390                s = (char *)(x+1);
 391                memcpy(s, string, patternlen);
 392                s[patternlen] = '\0';
 393                x->pattern = s;
 394        } else {
 395                x = xmalloc(sizeof(*x));
 396                x->pattern = string;
 397        }
 398        x->patternlen = patternlen;
 399        x->nowildcardlen = nowildcardlen;
 400        x->base = base;
 401        x->baselen = baselen;
 402        x->flags = flags;
 403        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 404        which->excludes[which->nr++] = x;
 405}
 406
 407static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 408{
 409        int pos, len;
 410        unsigned long sz;
 411        enum object_type type;
 412        void *data;
 413        struct index_state *istate = &the_index;
 414
 415        len = strlen(path);
 416        pos = index_name_pos(istate, path, len);
 417        if (pos < 0)
 418                return NULL;
 419        if (!ce_skip_worktree(istate->cache[pos]))
 420                return NULL;
 421        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 422        if (!data || type != OBJ_BLOB) {
 423                free(data);
 424                return NULL;
 425        }
 426        *size = xsize_t(sz);
 427        return data;
 428}
 429
 430void free_excludes(struct exclude_list *el)
 431{
 432        int i;
 433
 434        for (i = 0; i < el->nr; i++)
 435                free(el->excludes[i]);
 436        free(el->excludes);
 437
 438        el->nr = 0;
 439        el->excludes = NULL;
 440}
 441
 442int add_excludes_from_file_to_list(const char *fname,
 443                                   const char *base,
 444                                   int baselen,
 445                                   char **buf_p,
 446                                   struct exclude_list *which,
 447                                   int check_index)
 448{
 449        struct stat st;
 450        int fd, i;
 451        size_t size = 0;
 452        char *buf, *entry;
 453
 454        fd = open(fname, O_RDONLY);
 455        if (fd < 0 || fstat(fd, &st) < 0) {
 456                if (errno != ENOENT)
 457                        warn_on_inaccessible(fname);
 458                if (0 <= fd)
 459                        close(fd);
 460                if (!check_index ||
 461                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 462                        return -1;
 463                if (size == 0) {
 464                        free(buf);
 465                        return 0;
 466                }
 467                if (buf[size-1] != '\n') {
 468                        buf = xrealloc(buf, size+1);
 469                        buf[size++] = '\n';
 470                }
 471        }
 472        else {
 473                size = xsize_t(st.st_size);
 474                if (size == 0) {
 475                        close(fd);
 476                        return 0;
 477                }
 478                buf = xmalloc(size+1);
 479                if (read_in_full(fd, buf, size) != size) {
 480                        free(buf);
 481                        close(fd);
 482                        return -1;
 483                }
 484                buf[size++] = '\n';
 485                close(fd);
 486        }
 487
 488        if (buf_p)
 489                *buf_p = buf;
 490        entry = buf;
 491        for (i = 0; i < size; i++) {
 492                if (buf[i] == '\n') {
 493                        if (entry != buf + i && entry[0] != '#') {
 494                                buf[i - (i && buf[i-1] == '\r')] = 0;
 495                                add_exclude(entry, base, baselen, which);
 496                        }
 497                        entry = buf + i + 1;
 498                }
 499        }
 500        return 0;
 501}
 502
 503void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 504{
 505        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 506                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 507                die("cannot use %s as an exclude file", fname);
 508}
 509
 510static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 511{
 512        struct exclude_list *el;
 513        struct exclude_stack *stk = NULL;
 514        int current;
 515
 516        if ((!dir->exclude_per_dir) ||
 517            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 518                return; /* too long a path -- ignore */
 519
 520        /* Pop the ones that are not the prefix of the path being checked. */
 521        el = &dir->exclude_list[EXC_DIRS];
 522        while ((stk = dir->exclude_stack) != NULL) {
 523                if (stk->baselen <= baselen &&
 524                    !strncmp(dir->basebuf, base, stk->baselen))
 525                        break;
 526                dir->exclude_stack = stk->prev;
 527                while (stk->exclude_ix < el->nr)
 528                        free(el->excludes[--el->nr]);
 529                free(stk->filebuf);
 530                free(stk);
 531        }
 532
 533        /* Read from the parent directories and push them down. */
 534        current = stk ? stk->baselen : -1;
 535        while (current < baselen) {
 536                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 537                const char *cp;
 538
 539                if (current < 0) {
 540                        cp = base;
 541                        current = 0;
 542                }
 543                else {
 544                        cp = strchr(base + current + 1, '/');
 545                        if (!cp)
 546                                die("oops in prep_exclude");
 547                        cp++;
 548                }
 549                stk->prev = dir->exclude_stack;
 550                stk->baselen = cp - base;
 551                stk->exclude_ix = el->nr;
 552                memcpy(dir->basebuf + current, base + current,
 553                       stk->baselen - current);
 554                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 555                add_excludes_from_file_to_list(dir->basebuf,
 556                                               dir->basebuf, stk->baselen,
 557                                               &stk->filebuf, el, 1);
 558                dir->exclude_stack = stk;
 559                current = stk->baselen;
 560        }
 561        dir->basebuf[baselen] = '\0';
 562}
 563
 564int match_basename(const char *basename, int basenamelen,
 565                   const char *pattern, int prefix, int patternlen,
 566                   int flags)
 567{
 568        if (prefix == patternlen) {
 569                if (!strcmp_icase(pattern, basename))
 570                        return 1;
 571        } else if (flags & EXC_FLAG_ENDSWITH) {
 572                if (patternlen - 1 <= basenamelen &&
 573                    !strcmp_icase(pattern + 1,
 574                                  basename + basenamelen - patternlen + 1))
 575                        return 1;
 576        } else {
 577                if (fnmatch_icase(pattern, basename, 0) == 0)
 578                        return 1;
 579        }
 580        return 0;
 581}
 582
 583int match_pathname(const char *pathname, int pathlen,
 584                   const char *base, int baselen,
 585                   const char *pattern, int prefix, int patternlen,
 586                   int flags)
 587{
 588        const char *name;
 589        int namelen;
 590
 591        /*
 592         * match with FNM_PATHNAME; the pattern has base implicitly
 593         * in front of it.
 594         */
 595        if (*pattern == '/') {
 596                pattern++;
 597                prefix--;
 598        }
 599
 600        /*
 601         * baselen does not count the trailing slash. base[] may or
 602         * may not end with a trailing slash though.
 603         */
 604        if (pathlen < baselen + 1 ||
 605            (baselen && pathname[baselen] != '/') ||
 606            strncmp_icase(pathname, base, baselen))
 607                return 0;
 608
 609        namelen = baselen ? pathlen - baselen - 1 : pathlen;
 610        name = pathname + pathlen - namelen;
 611
 612        if (prefix) {
 613                /*
 614                 * if the non-wildcard part is longer than the
 615                 * remaining pathname, surely it cannot match.
 616                 */
 617                if (prefix > namelen)
 618                        return 0;
 619
 620                if (strncmp_icase(pattern, name, prefix))
 621                        return 0;
 622                pattern += prefix;
 623                name    += prefix;
 624                namelen -= prefix;
 625        }
 626
 627        return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
 628}
 629
 630/* Scan the list and let the last match determine the fate.
 631 * Return 1 for exclude, 0 for include and -1 for undecided.
 632 */
 633int excluded_from_list(const char *pathname,
 634                       int pathlen, const char *basename, int *dtype,
 635                       struct exclude_list *el)
 636{
 637        int i;
 638
 639        if (!el->nr)
 640                return -1;      /* undefined */
 641
 642        for (i = el->nr - 1; 0 <= i; i--) {
 643                struct exclude *x = el->excludes[i];
 644                const char *exclude = x->pattern;
 645                int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 646                int prefix = x->nowildcardlen;
 647
 648                if (x->flags & EXC_FLAG_MUSTBEDIR) {
 649                        if (*dtype == DT_UNKNOWN)
 650                                *dtype = get_dtype(NULL, pathname, pathlen);
 651                        if (*dtype != DT_DIR)
 652                                continue;
 653                }
 654
 655                if (x->flags & EXC_FLAG_NODIR) {
 656                        if (match_basename(basename,
 657                                           pathlen - (basename - pathname),
 658                                           exclude, prefix, x->patternlen,
 659                                           x->flags))
 660                                return to_exclude;
 661                        continue;
 662                }
 663
 664                assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
 665                if (match_pathname(pathname, pathlen,
 666                                   x->base, x->baselen ? x->baselen - 1 : 0,
 667                                   exclude, prefix, x->patternlen, x->flags))
 668                        return to_exclude;
 669        }
 670        return -1; /* undecided */
 671}
 672
 673static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 674{
 675        int pathlen = strlen(pathname);
 676        int st;
 677        const char *basename = strrchr(pathname, '/');
 678        basename = (basename) ? basename+1 : pathname;
 679
 680        prep_exclude(dir, pathname, basename-pathname);
 681        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 682                switch (excluded_from_list(pathname, pathlen, basename,
 683                                           dtype_p, &dir->exclude_list[st])) {
 684                case 0:
 685                        return 0;
 686                case 1:
 687                        return 1;
 688                }
 689        }
 690        return 0;
 691}
 692
 693void path_exclude_check_init(struct path_exclude_check *check,
 694                             struct dir_struct *dir)
 695{
 696        check->dir = dir;
 697        strbuf_init(&check->path, 256);
 698}
 699
 700void path_exclude_check_clear(struct path_exclude_check *check)
 701{
 702        strbuf_release(&check->path);
 703}
 704
 705/*
 706 * Is this name excluded?  This is for a caller like show_files() that
 707 * do not honor directory hierarchy and iterate through paths that are
 708 * possibly in an ignored directory.
 709 *
 710 * A path to a directory known to be excluded is left in check->path to
 711 * optimize for repeated checks for files in the same excluded directory.
 712 */
 713int path_excluded(struct path_exclude_check *check,
 714                  const char *name, int namelen, int *dtype)
 715{
 716        int i;
 717        struct strbuf *path = &check->path;
 718
 719        /*
 720         * we allow the caller to pass namelen as an optimization; it
 721         * must match the length of the name, as we eventually call
 722         * excluded() on the whole name string.
 723         */
 724        if (namelen < 0)
 725                namelen = strlen(name);
 726
 727        if (path->len &&
 728            path->len <= namelen &&
 729            !memcmp(name, path->buf, path->len) &&
 730            (!name[path->len] || name[path->len] == '/'))
 731                return 1;
 732
 733        strbuf_setlen(path, 0);
 734        for (i = 0; name[i]; i++) {
 735                int ch = name[i];
 736
 737                if (ch == '/') {
 738                        int dt = DT_DIR;
 739                        if (excluded(check->dir, path->buf, &dt))
 740                                return 1;
 741                }
 742                strbuf_addch(path, ch);
 743        }
 744
 745        /* An entry in the index; cannot be a directory with subentries */
 746        strbuf_setlen(path, 0);
 747
 748        return excluded(check->dir, name, dtype);
 749}
 750
 751static struct dir_entry *dir_entry_new(const char *pathname, int len)
 752{
 753        struct dir_entry *ent;
 754
 755        ent = xmalloc(sizeof(*ent) + len + 1);
 756        ent->len = len;
 757        memcpy(ent->name, pathname, len);
 758        ent->name[len] = 0;
 759        return ent;
 760}
 761
 762static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 763{
 764        if (cache_name_exists(pathname, len, ignore_case))
 765                return NULL;
 766
 767        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 768        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 769}
 770
 771struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 772{
 773        if (!cache_name_is_other(pathname, len))
 774                return NULL;
 775
 776        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 777        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 778}
 779
 780enum exist_status {
 781        index_nonexistent = 0,
 782        index_directory,
 783        index_gitdir
 784};
 785
 786/*
 787 * Do not use the alphabetically stored index to look up
 788 * the directory name; instead, use the case insensitive
 789 * name hash.
 790 */
 791static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 792{
 793        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 794        unsigned char endchar;
 795
 796        if (!ce)
 797                return index_nonexistent;
 798        endchar = ce->name[len];
 799
 800        /*
 801         * The cache_entry structure returned will contain this dirname
 802         * and possibly additional path components.
 803         */
 804        if (endchar == '/')
 805                return index_directory;
 806
 807        /*
 808         * If there are no additional path components, then this cache_entry
 809         * represents a submodule.  Submodules, despite being directories,
 810         * are stored in the cache without a closing slash.
 811         */
 812        if (!endchar && S_ISGITLINK(ce->ce_mode))
 813                return index_gitdir;
 814
 815        /* This should never be hit, but it exists just in case. */
 816        return index_nonexistent;
 817}
 818
 819/*
 820 * The index sorts alphabetically by entry name, which
 821 * means that a gitlink sorts as '\0' at the end, while
 822 * a directory (which is defined not as an entry, but as
 823 * the files it contains) will sort with the '/' at the
 824 * end.
 825 */
 826static enum exist_status directory_exists_in_index(const char *dirname, int len)
 827{
 828        int pos;
 829
 830        if (ignore_case)
 831                return directory_exists_in_index_icase(dirname, len);
 832
 833        pos = cache_name_pos(dirname, len);
 834        if (pos < 0)
 835                pos = -pos-1;
 836        while (pos < active_nr) {
 837                struct cache_entry *ce = active_cache[pos++];
 838                unsigned char endchar;
 839
 840                if (strncmp(ce->name, dirname, len))
 841                        break;
 842                endchar = ce->name[len];
 843                if (endchar > '/')
 844                        break;
 845                if (endchar == '/')
 846                        return index_directory;
 847                if (!endchar && S_ISGITLINK(ce->ce_mode))
 848                        return index_gitdir;
 849        }
 850        return index_nonexistent;
 851}
 852
 853/*
 854 * When we find a directory when traversing the filesystem, we
 855 * have three distinct cases:
 856 *
 857 *  - ignore it
 858 *  - see it as a directory
 859 *  - recurse into it
 860 *
 861 * and which one we choose depends on a combination of existing
 862 * git index contents and the flags passed into the directory
 863 * traversal routine.
 864 *
 865 * Case 1: If we *already* have entries in the index under that
 866 * directory name, we always recurse into the directory to see
 867 * all the files.
 868 *
 869 * Case 2: If we *already* have that directory name as a gitlink,
 870 * we always continue to see it as a gitlink, regardless of whether
 871 * there is an actual git directory there or not (it might not
 872 * be checked out as a subproject!)
 873 *
 874 * Case 3: if we didn't have it in the index previously, we
 875 * have a few sub-cases:
 876 *
 877 *  (a) if "show_other_directories" is true, we show it as
 878 *      just a directory, unless "hide_empty_directories" is
 879 *      also true and the directory is empty, in which case
 880 *      we just ignore it entirely.
 881 *  (b) if it looks like a git directory, and we don't have
 882 *      'no_gitlinks' set we treat it as a gitlink, and show it
 883 *      as a directory.
 884 *  (c) otherwise, we recurse into it.
 885 */
 886enum directory_treatment {
 887        show_directory,
 888        ignore_directory,
 889        recurse_into_directory
 890};
 891
 892static enum directory_treatment treat_directory(struct dir_struct *dir,
 893        const char *dirname, int len,
 894        const struct path_simplify *simplify)
 895{
 896        /* The "len-1" is to strip the final '/' */
 897        switch (directory_exists_in_index(dirname, len-1)) {
 898        case index_directory:
 899                return recurse_into_directory;
 900
 901        case index_gitdir:
 902                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 903                        return ignore_directory;
 904                return show_directory;
 905
 906        case index_nonexistent:
 907                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 908                        break;
 909                if (!(dir->flags & DIR_NO_GITLINKS)) {
 910                        unsigned char sha1[20];
 911                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 912                                return show_directory;
 913                }
 914                return recurse_into_directory;
 915        }
 916
 917        /* This is the "show_other_directories" case */
 918        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 919                return show_directory;
 920        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 921                return ignore_directory;
 922        return show_directory;
 923}
 924
 925/*
 926 * This is an inexact early pruning of any recursive directory
 927 * reading - if the path cannot possibly be in the pathspec,
 928 * return true, and we'll skip it early.
 929 */
 930static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 931{
 932        if (simplify) {
 933                for (;;) {
 934                        const char *match = simplify->path;
 935                        int len = simplify->len;
 936
 937                        if (!match)
 938                                break;
 939                        if (len > pathlen)
 940                                len = pathlen;
 941                        if (!memcmp(path, match, len))
 942                                return 0;
 943                        simplify++;
 944                }
 945                return 1;
 946        }
 947        return 0;
 948}
 949
 950/*
 951 * This function tells us whether an excluded path matches a
 952 * list of "interesting" pathspecs. That is, whether a path matched
 953 * by any of the pathspecs could possibly be ignored by excluding
 954 * the specified path. This can happen if:
 955 *
 956 *   1. the path is mentioned explicitly in the pathspec
 957 *
 958 *   2. the path is a directory prefix of some element in the
 959 *      pathspec
 960 */
 961static int exclude_matches_pathspec(const char *path, int len,
 962                const struct path_simplify *simplify)
 963{
 964        if (simplify) {
 965                for (; simplify->path; simplify++) {
 966                        if (len == simplify->len
 967                            && !memcmp(path, simplify->path, len))
 968                                return 1;
 969                        if (len < simplify->len
 970                            && simplify->path[len] == '/'
 971                            && !memcmp(path, simplify->path, len))
 972                                return 1;
 973                }
 974        }
 975        return 0;
 976}
 977
 978static int get_index_dtype(const char *path, int len)
 979{
 980        int pos;
 981        struct cache_entry *ce;
 982
 983        ce = cache_name_exists(path, len, 0);
 984        if (ce) {
 985                if (!ce_uptodate(ce))
 986                        return DT_UNKNOWN;
 987                if (S_ISGITLINK(ce->ce_mode))
 988                        return DT_DIR;
 989                /*
 990                 * Nobody actually cares about the
 991                 * difference between DT_LNK and DT_REG
 992                 */
 993                return DT_REG;
 994        }
 995
 996        /* Try to look it up as a directory */
 997        pos = cache_name_pos(path, len);
 998        if (pos >= 0)
 999                return DT_UNKNOWN;
1000        pos = -pos-1;
1001        while (pos < active_nr) {
1002                ce = active_cache[pos++];
1003                if (strncmp(ce->name, path, len))
1004                        break;
1005                if (ce->name[len] > '/')
1006                        break;
1007                if (ce->name[len] < '/')
1008                        continue;
1009                if (!ce_uptodate(ce))
1010                        break;  /* continue? */
1011                return DT_DIR;
1012        }
1013        return DT_UNKNOWN;
1014}
1015
1016static int get_dtype(struct dirent *de, const char *path, int len)
1017{
1018        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
1019        struct stat st;
1020
1021        if (dtype != DT_UNKNOWN)
1022                return dtype;
1023        dtype = get_index_dtype(path, len);
1024        if (dtype != DT_UNKNOWN)
1025                return dtype;
1026        if (lstat(path, &st))
1027                return dtype;
1028        if (S_ISREG(st.st_mode))
1029                return DT_REG;
1030        if (S_ISDIR(st.st_mode))
1031                return DT_DIR;
1032        if (S_ISLNK(st.st_mode))
1033                return DT_LNK;
1034        return dtype;
1035}
1036
1037enum path_treatment {
1038        path_ignored,
1039        path_handled,
1040        path_recurse
1041};
1042
1043static enum path_treatment treat_one_path(struct dir_struct *dir,
1044                                          struct strbuf *path,
1045                                          const struct path_simplify *simplify,
1046                                          int dtype, struct dirent *de)
1047{
1048        int exclude = excluded(dir, path->buf, &dtype);
1049        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
1050            && exclude_matches_pathspec(path->buf, path->len, simplify))
1051                dir_add_ignored(dir, path->buf, path->len);
1052
1053        /*
1054         * Excluded? If we don't explicitly want to show
1055         * ignored files, ignore it
1056         */
1057        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
1058                return path_ignored;
1059
1060        if (dtype == DT_UNKNOWN)
1061                dtype = get_dtype(de, path->buf, path->len);
1062
1063        /*
1064         * Do we want to see just the ignored files?
1065         * We still need to recurse into directories,
1066         * even if we don't ignore them, since the
1067         * directory may contain files that we do..
1068         */
1069        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
1070                if (dtype != DT_DIR)
1071                        return path_ignored;
1072        }
1073
1074        switch (dtype) {
1075        default:
1076                return path_ignored;
1077        case DT_DIR:
1078                strbuf_addch(path, '/');
1079                switch (treat_directory(dir, path->buf, path->len, simplify)) {
1080                case show_directory:
1081                        if (exclude != !!(dir->flags
1082                                          & DIR_SHOW_IGNORED))
1083                                return path_ignored;
1084                        break;
1085                case recurse_into_directory:
1086                        return path_recurse;
1087                case ignore_directory:
1088                        return path_ignored;
1089                }
1090                break;
1091        case DT_REG:
1092        case DT_LNK:
1093                break;
1094        }
1095        return path_handled;
1096}
1097
1098static enum path_treatment treat_path(struct dir_struct *dir,
1099                                      struct dirent *de,
1100                                      struct strbuf *path,
1101                                      int baselen,
1102                                      const struct path_simplify *simplify)
1103{
1104        int dtype;
1105
1106        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
1107                return path_ignored;
1108        strbuf_setlen(path, baselen);
1109        strbuf_addstr(path, de->d_name);
1110        if (simplify_away(path->buf, path->len, simplify))
1111                return path_ignored;
1112
1113        dtype = DTYPE(de);
1114        return treat_one_path(dir, path, simplify, dtype, de);
1115}
1116
1117/*
1118 * Read a directory tree. We currently ignore anything but
1119 * directories, regular files and symlinks. That's because git
1120 * doesn't handle them at all yet. Maybe that will change some
1121 * day.
1122 *
1123 * Also, we ignore the name ".git" (even if it is not a directory).
1124 * That likely will not change.
1125 */
1126static int read_directory_recursive(struct dir_struct *dir,
1127                                    const char *base, int baselen,
1128                                    int check_only,
1129                                    const struct path_simplify *simplify)
1130{
1131        DIR *fdir;
1132        int contents = 0;
1133        struct dirent *de;
1134        struct strbuf path = STRBUF_INIT;
1135
1136        strbuf_add(&path, base, baselen);
1137
1138        fdir = opendir(path.len ? path.buf : ".");
1139        if (!fdir)
1140                goto out;
1141
1142        while ((de = readdir(fdir)) != NULL) {
1143                switch (treat_path(dir, de, &path, baselen, simplify)) {
1144                case path_recurse:
1145                        contents += read_directory_recursive(dir, path.buf,
1146                                                             path.len, 0,
1147                                                             simplify);
1148                        continue;
1149                case path_ignored:
1150                        continue;
1151                case path_handled:
1152                        break;
1153                }
1154                contents++;
1155                if (check_only)
1156                        break;
1157                dir_add_name(dir, path.buf, path.len);
1158        }
1159        closedir(fdir);
1160 out:
1161        strbuf_release(&path);
1162
1163        return contents;
1164}
1165
1166static int cmp_name(const void *p1, const void *p2)
1167{
1168        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1169        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1170
1171        return cache_name_compare(e1->name, e1->len,
1172                                  e2->name, e2->len);
1173}
1174
1175static struct path_simplify *create_simplify(const char **pathspec)
1176{
1177        int nr, alloc = 0;
1178        struct path_simplify *simplify = NULL;
1179
1180        if (!pathspec)
1181                return NULL;
1182
1183        for (nr = 0 ; ; nr++) {
1184                const char *match;
1185                if (nr >= alloc) {
1186                        alloc = alloc_nr(alloc);
1187                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1188                }
1189                match = *pathspec++;
1190                if (!match)
1191                        break;
1192                simplify[nr].path = match;
1193                simplify[nr].len = simple_length(match);
1194        }
1195        simplify[nr].path = NULL;
1196        simplify[nr].len = 0;
1197        return simplify;
1198}
1199
1200static void free_simplify(struct path_simplify *simplify)
1201{
1202        free(simplify);
1203}
1204
1205static int treat_leading_path(struct dir_struct *dir,
1206                              const char *path, int len,
1207                              const struct path_simplify *simplify)
1208{
1209        struct strbuf sb = STRBUF_INIT;
1210        int baselen, rc = 0;
1211        const char *cp;
1212
1213        while (len && path[len - 1] == '/')
1214                len--;
1215        if (!len)
1216                return 1;
1217        baselen = 0;
1218        while (1) {
1219                cp = path + baselen + !!baselen;
1220                cp = memchr(cp, '/', path + len - cp);
1221                if (!cp)
1222                        baselen = len;
1223                else
1224                        baselen = cp - path;
1225                strbuf_setlen(&sb, 0);
1226                strbuf_add(&sb, path, baselen);
1227                if (!is_directory(sb.buf))
1228                        break;
1229                if (simplify_away(sb.buf, sb.len, simplify))
1230                        break;
1231                if (treat_one_path(dir, &sb, simplify,
1232                                   DT_DIR, NULL) == path_ignored)
1233                        break; /* do not recurse into it */
1234                if (len <= baselen) {
1235                        rc = 1;
1236                        break; /* finished checking */
1237                }
1238        }
1239        strbuf_release(&sb);
1240        return rc;
1241}
1242
1243int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1244{
1245        struct path_simplify *simplify;
1246
1247        if (has_symlink_leading_path(path, len))
1248                return dir->nr;
1249
1250        simplify = create_simplify(pathspec);
1251        if (!len || treat_leading_path(dir, path, len, simplify))
1252                read_directory_recursive(dir, path, len, 0, simplify);
1253        free_simplify(simplify);
1254        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1255        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1256        return dir->nr;
1257}
1258
1259int file_exists(const char *f)
1260{
1261        struct stat sb;
1262        return lstat(f, &sb) == 0;
1263}
1264
1265/*
1266 * Given two normalized paths (a trailing slash is ok), if subdir is
1267 * outside dir, return -1.  Otherwise return the offset in subdir that
1268 * can be used as relative path to dir.
1269 */
1270int dir_inside_of(const char *subdir, const char *dir)
1271{
1272        int offset = 0;
1273
1274        assert(dir && subdir && *dir && *subdir);
1275
1276        while (*dir && *subdir && *dir == *subdir) {
1277                dir++;
1278                subdir++;
1279                offset++;
1280        }
1281
1282        /* hel[p]/me vs hel[l]/yeah */
1283        if (*dir && *subdir)
1284                return -1;
1285
1286        if (!*subdir)
1287                return !*dir ? offset : -1; /* same dir */
1288
1289        /* foo/[b]ar vs foo/[] */
1290        if (is_dir_sep(dir[-1]))
1291                return is_dir_sep(subdir[-1]) ? offset : -1;
1292
1293        /* foo[/]bar vs foo[] */
1294        return is_dir_sep(*subdir) ? offset + 1 : -1;
1295}
1296
1297int is_inside_dir(const char *dir)
1298{
1299        char cwd[PATH_MAX];
1300        if (!dir)
1301                return 0;
1302        if (!getcwd(cwd, sizeof(cwd)))
1303                die_errno("can't find the current directory");
1304        return dir_inside_of(cwd, dir) >= 0;
1305}
1306
1307int is_empty_dir(const char *path)
1308{
1309        DIR *dir = opendir(path);
1310        struct dirent *e;
1311        int ret = 1;
1312
1313        if (!dir)
1314                return 0;
1315
1316        while ((e = readdir(dir)) != NULL)
1317                if (!is_dot_or_dotdot(e->d_name)) {
1318                        ret = 0;
1319                        break;
1320                }
1321
1322        closedir(dir);
1323        return ret;
1324}
1325
1326static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
1327{
1328        DIR *dir;
1329        struct dirent *e;
1330        int ret = 0, original_len = path->len, len, kept_down = 0;
1331        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1332        int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL);
1333        unsigned char submodule_head[20];
1334
1335        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1336            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {
1337                /* Do not descend and nuke a nested git work tree. */
1338                if (kept_up)
1339                        *kept_up = 1;
1340                return 0;
1341        }
1342
1343        flag &= ~REMOVE_DIR_KEEP_TOPLEVEL;
1344        dir = opendir(path->buf);
1345        if (!dir) {
1346                /* an empty dir could be removed even if it is unreadble */
1347                if (!keep_toplevel)
1348                        return rmdir(path->buf);
1349                else
1350                        return -1;
1351        }
1352        if (path->buf[original_len - 1] != '/')
1353                strbuf_addch(path, '/');
1354
1355        len = path->len;
1356        while ((e = readdir(dir)) != NULL) {
1357                struct stat st;
1358                if (is_dot_or_dotdot(e->d_name))
1359                        continue;
1360
1361                strbuf_setlen(path, len);
1362                strbuf_addstr(path, e->d_name);
1363                if (lstat(path->buf, &st))
1364                        ; /* fall thru */
1365                else if (S_ISDIR(st.st_mode)) {
1366                        if (!remove_dir_recurse(path, flag, &kept_down))
1367                                continue; /* happy */
1368                } else if (!only_empty && !unlink(path->buf))
1369                        continue; /* happy, too */
1370
1371                /* path too long, stat fails, or non-directory still exists */
1372                ret = -1;
1373                break;
1374        }
1375        closedir(dir);
1376
1377        strbuf_setlen(path, original_len);
1378        if (!ret && !keep_toplevel && !kept_down)
1379                ret = rmdir(path->buf);
1380        else if (kept_up)
1381                /*
1382                 * report the uplevel that it is not an error that we
1383                 * did not rmdir() our directory.
1384                 */
1385                *kept_up = !ret;
1386        return ret;
1387}
1388
1389int remove_dir_recursively(struct strbuf *path, int flag)
1390{
1391        return remove_dir_recurse(path, flag, NULL);
1392}
1393
1394void setup_standard_excludes(struct dir_struct *dir)
1395{
1396        const char *path;
1397        char *xdg_path;
1398
1399        dir->exclude_per_dir = ".gitignore";
1400        path = git_path("info/exclude");
1401        if (!excludes_file) {
1402                home_config_paths(NULL, &xdg_path, "ignore");
1403                excludes_file = xdg_path;
1404        }
1405        if (!access_or_warn(path, R_OK))
1406                add_excludes_from_file(dir, path);
1407        if (excludes_file && !access_or_warn(excludes_file, R_OK))
1408                add_excludes_from_file(dir, excludes_file);
1409}
1410
1411int remove_path(const char *name)
1412{
1413        char *slash;
1414
1415        if (unlink(name) && errno != ENOENT)
1416                return -1;
1417
1418        slash = strrchr(name, '/');
1419        if (slash) {
1420                char *dirs = xstrdup(name);
1421                slash = dirs + (slash - name);
1422                do {
1423                        *slash = '\0';
1424                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1425                free(dirs);
1426        }
1427        return 0;
1428}
1429
1430static int pathspec_item_cmp(const void *a_, const void *b_)
1431{
1432        struct pathspec_item *a, *b;
1433
1434        a = (struct pathspec_item *)a_;
1435        b = (struct pathspec_item *)b_;
1436        return strcmp(a->match, b->match);
1437}
1438
1439int init_pathspec(struct pathspec *pathspec, const char **paths)
1440{
1441        const char **p = paths;
1442        int i;
1443
1444        memset(pathspec, 0, sizeof(*pathspec));
1445        if (!p)
1446                return 0;
1447        while (*p)
1448                p++;
1449        pathspec->raw = paths;
1450        pathspec->nr = p - paths;
1451        if (!pathspec->nr)
1452                return 0;
1453
1454        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1455        for (i = 0; i < pathspec->nr; i++) {
1456                struct pathspec_item *item = pathspec->items+i;
1457                const char *path = paths[i];
1458
1459                item->match = path;
1460                item->len = strlen(path);
1461                item->flags = 0;
1462                if (limit_pathspec_to_literal()) {
1463                        item->nowildcard_len = item->len;
1464                } else {
1465                        item->nowildcard_len = simple_length(path);
1466                        if (item->nowildcard_len < item->len) {
1467                                pathspec->has_wildcard = 1;
1468                                if (path[item->nowildcard_len] == '*' &&
1469                                    no_wildcard(path + item->nowildcard_len + 1))
1470                                        item->flags |= PATHSPEC_ONESTAR;
1471                        }
1472                }
1473        }
1474
1475        qsort(pathspec->items, pathspec->nr,
1476              sizeof(struct pathspec_item), pathspec_item_cmp);
1477
1478        return 0;
1479}
1480
1481void free_pathspec(struct pathspec *pathspec)
1482{
1483        free(pathspec->items);
1484        pathspec->items = NULL;
1485}
1486
1487int limit_pathspec_to_literal(void)
1488{
1489        static int flag = -1;
1490        if (flag < 0)
1491                flag = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
1492        return flag;
1493}