dir.con commit Merge branch 'jk/complete-commit-c' (d2638e1)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37inline int git_fnmatch(const char *pattern, const char *string,
  38                       int flags, int prefix)
  39{
  40        int fnm_flags = 0;
  41        if (flags & GFNM_PATHNAME)
  42                fnm_flags |= FNM_PATHNAME;
  43        if (prefix > 0) {
  44                if (strncmp(pattern, string, prefix))
  45                        return FNM_NOMATCH;
  46                pattern += prefix;
  47                string += prefix;
  48        }
  49        if (flags & GFNM_ONESTAR) {
  50                int pattern_len = strlen(++pattern);
  51                int string_len = strlen(string);
  52                return string_len < pattern_len ||
  53                       strcmp(pattern,
  54                              string + string_len - pattern_len);
  55        }
  56        return fnmatch(pattern, string, fnm_flags);
  57}
  58
  59static size_t common_prefix_len(const char **pathspec)
  60{
  61        const char *n, *first;
  62        size_t max = 0;
  63
  64        if (!pathspec)
  65                return max;
  66
  67        first = *pathspec;
  68        while ((n = *pathspec++)) {
  69                size_t i, len = 0;
  70                for (i = 0; first == n || i < max; i++) {
  71                        char c = n[i];
  72                        if (!c || c != first[i] || is_glob_special(c))
  73                                break;
  74                        if (c == '/')
  75                                len = i + 1;
  76                }
  77                if (first == n || len < max) {
  78                        max = len;
  79                        if (!max)
  80                                break;
  81                }
  82        }
  83        return max;
  84}
  85
  86/*
  87 * Returns a copy of the longest leading path common among all
  88 * pathspecs.
  89 */
  90char *common_prefix(const char **pathspec)
  91{
  92        unsigned long len = common_prefix_len(pathspec);
  93
  94        return len ? xmemdupz(*pathspec, len) : NULL;
  95}
  96
  97int fill_directory(struct dir_struct *dir, const char **pathspec)
  98{
  99        size_t len;
 100
 101        /*
 102         * Calculate common prefix for the pathspec, and
 103         * use that to optimize the directory walk
 104         */
 105        len = common_prefix_len(pathspec);
 106
 107        /* Read the directory and prune it */
 108        read_directory(dir, pathspec ? *pathspec : "", len, pathspec);
 109        return len;
 110}
 111
 112int within_depth(const char *name, int namelen,
 113                        int depth, int max_depth)
 114{
 115        const char *cp = name, *cpe = name + namelen;
 116
 117        while (cp < cpe) {
 118                if (*cp++ != '/')
 119                        continue;
 120                depth++;
 121                if (depth > max_depth)
 122                        return 0;
 123        }
 124        return 1;
 125}
 126
 127/*
 128 * Does 'match' match the given name?
 129 * A match is found if
 130 *
 131 * (1) the 'match' string is leading directory of 'name', or
 132 * (2) the 'match' string is a wildcard and matches 'name', or
 133 * (3) the 'match' string is exactly the same as 'name'.
 134 *
 135 * and the return value tells which case it was.
 136 *
 137 * It returns 0 when there is no match.
 138 */
 139static int match_one(const char *match, const char *name, int namelen)
 140{
 141        int matchlen;
 142
 143        /* If the match was just the prefix, we matched */
 144        if (!*match)
 145                return MATCHED_RECURSIVELY;
 146
 147        if (ignore_case) {
 148                for (;;) {
 149                        unsigned char c1 = tolower(*match);
 150                        unsigned char c2 = tolower(*name);
 151                        if (c1 == '\0' || is_glob_special(c1))
 152                                break;
 153                        if (c1 != c2)
 154                                return 0;
 155                        match++;
 156                        name++;
 157                        namelen--;
 158                }
 159        } else {
 160                for (;;) {
 161                        unsigned char c1 = *match;
 162                        unsigned char c2 = *name;
 163                        if (c1 == '\0' || is_glob_special(c1))
 164                                break;
 165                        if (c1 != c2)
 166                                return 0;
 167                        match++;
 168                        name++;
 169                        namelen--;
 170                }
 171        }
 172
 173
 174        /*
 175         * If we don't match the matchstring exactly,
 176         * we need to match by fnmatch
 177         */
 178        matchlen = strlen(match);
 179        if (strncmp_icase(match, name, matchlen))
 180                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 181
 182        if (namelen == matchlen)
 183                return MATCHED_EXACTLY;
 184        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 185                return MATCHED_RECURSIVELY;
 186        return 0;
 187}
 188
 189/*
 190 * Given a name and a list of pathspecs, see if the name matches
 191 * any of the pathspecs.  The caller is also interested in seeing
 192 * all pathspec matches some names it calls this function with
 193 * (otherwise the user could have mistyped the unmatched pathspec),
 194 * and a mark is left in seen[] array for pathspec element that
 195 * actually matched anything.
 196 */
 197int match_pathspec(const char **pathspec, const char *name, int namelen,
 198                int prefix, char *seen)
 199{
 200        int i, retval = 0;
 201
 202        if (!pathspec)
 203                return 1;
 204
 205        name += prefix;
 206        namelen -= prefix;
 207
 208        for (i = 0; pathspec[i] != NULL; i++) {
 209                int how;
 210                const char *match = pathspec[i] + prefix;
 211                if (seen && seen[i] == MATCHED_EXACTLY)
 212                        continue;
 213                how = match_one(match, name, namelen);
 214                if (how) {
 215                        if (retval < how)
 216                                retval = how;
 217                        if (seen && seen[i] < how)
 218                                seen[i] = how;
 219                }
 220        }
 221        return retval;
 222}
 223
 224/*
 225 * Does 'match' match the given name?
 226 * A match is found if
 227 *
 228 * (1) the 'match' string is leading directory of 'name', or
 229 * (2) the 'match' string is a wildcard and matches 'name', or
 230 * (3) the 'match' string is exactly the same as 'name'.
 231 *
 232 * and the return value tells which case it was.
 233 *
 234 * It returns 0 when there is no match.
 235 */
 236static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 237                               const char *name, int namelen)
 238{
 239        /* name/namelen has prefix cut off by caller */
 240        const char *match = item->match + prefix;
 241        int matchlen = item->len - prefix;
 242
 243        /* If the match was just the prefix, we matched */
 244        if (!*match)
 245                return MATCHED_RECURSIVELY;
 246
 247        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 248                if (matchlen == namelen)
 249                        return MATCHED_EXACTLY;
 250
 251                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 252                        return MATCHED_RECURSIVELY;
 253        }
 254
 255        if (item->nowildcard_len < item->len &&
 256            !git_fnmatch(match, name,
 257                         item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 258                         item->nowildcard_len - prefix))
 259                return MATCHED_FNMATCH;
 260
 261        return 0;
 262}
 263
 264/*
 265 * Given a name and a list of pathspecs, see if the name matches
 266 * any of the pathspecs.  The caller is also interested in seeing
 267 * all pathspec matches some names it calls this function with
 268 * (otherwise the user could have mistyped the unmatched pathspec),
 269 * and a mark is left in seen[] array for pathspec element that
 270 * actually matched anything.
 271 */
 272int match_pathspec_depth(const struct pathspec *ps,
 273                         const char *name, int namelen,
 274                         int prefix, char *seen)
 275{
 276        int i, retval = 0;
 277
 278        if (!ps->nr) {
 279                if (!ps->recursive || ps->max_depth == -1)
 280                        return MATCHED_RECURSIVELY;
 281
 282                if (within_depth(name, namelen, 0, ps->max_depth))
 283                        return MATCHED_EXACTLY;
 284                else
 285                        return 0;
 286        }
 287
 288        name += prefix;
 289        namelen -= prefix;
 290
 291        for (i = ps->nr - 1; i >= 0; i--) {
 292                int how;
 293                if (seen && seen[i] == MATCHED_EXACTLY)
 294                        continue;
 295                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 296                if (ps->recursive && ps->max_depth != -1 &&
 297                    how && how != MATCHED_FNMATCH) {
 298                        int len = ps->items[i].len;
 299                        if (name[len] == '/')
 300                                len++;
 301                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 302                                how = MATCHED_EXACTLY;
 303                        else
 304                                how = 0;
 305                }
 306                if (how) {
 307                        if (retval < how)
 308                                retval = how;
 309                        if (seen && seen[i] < how)
 310                                seen[i] = how;
 311                }
 312        }
 313        return retval;
 314}
 315
 316/*
 317 * Return the length of the "simple" part of a path match limiter.
 318 */
 319static int simple_length(const char *match)
 320{
 321        int len = -1;
 322
 323        for (;;) {
 324                unsigned char c = *match++;
 325                len++;
 326                if (c == '\0' || is_glob_special(c))
 327                        return len;
 328        }
 329}
 330
 331static int no_wildcard(const char *string)
 332{
 333        return string[simple_length(string)] == '\0';
 334}
 335
 336void parse_exclude_pattern(const char **pattern,
 337                           int *patternlen,
 338                           int *flags,
 339                           int *nowildcardlen)
 340{
 341        const char *p = *pattern;
 342        size_t i, len;
 343
 344        *flags = 0;
 345        if (*p == '!') {
 346                *flags |= EXC_FLAG_NEGATIVE;
 347                p++;
 348        }
 349        len = strlen(p);
 350        if (len && p[len - 1] == '/') {
 351                len--;
 352                *flags |= EXC_FLAG_MUSTBEDIR;
 353        }
 354        for (i = 0; i < len; i++) {
 355                if (p[i] == '/')
 356                        break;
 357        }
 358        if (i == len)
 359                *flags |= EXC_FLAG_NODIR;
 360        *nowildcardlen = simple_length(p);
 361        /*
 362         * we should have excluded the trailing slash from 'p' too,
 363         * but that's one more allocation. Instead just make sure
 364         * nowildcardlen does not exceed real patternlen
 365         */
 366        if (*nowildcardlen > len)
 367                *nowildcardlen = len;
 368        if (*p == '*' && no_wildcard(p + 1))
 369                *flags |= EXC_FLAG_ENDSWITH;
 370        *pattern = p;
 371        *patternlen = len;
 372}
 373
 374void add_exclude(const char *string, const char *base,
 375                 int baselen, struct exclude_list *which)
 376{
 377        struct exclude *x;
 378        int patternlen;
 379        int flags;
 380        int nowildcardlen;
 381
 382        parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
 383        if (flags & EXC_FLAG_MUSTBEDIR) {
 384                char *s;
 385                x = xmalloc(sizeof(*x) + patternlen + 1);
 386                s = (char *)(x+1);
 387                memcpy(s, string, patternlen);
 388                s[patternlen] = '\0';
 389                x->pattern = s;
 390        } else {
 391                x = xmalloc(sizeof(*x));
 392                x->pattern = string;
 393        }
 394        x->patternlen = patternlen;
 395        x->nowildcardlen = nowildcardlen;
 396        x->base = base;
 397        x->baselen = baselen;
 398        x->flags = flags;
 399        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 400        which->excludes[which->nr++] = x;
 401}
 402
 403static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 404{
 405        int pos, len;
 406        unsigned long sz;
 407        enum object_type type;
 408        void *data;
 409        struct index_state *istate = &the_index;
 410
 411        len = strlen(path);
 412        pos = index_name_pos(istate, path, len);
 413        if (pos < 0)
 414                return NULL;
 415        if (!ce_skip_worktree(istate->cache[pos]))
 416                return NULL;
 417        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 418        if (!data || type != OBJ_BLOB) {
 419                free(data);
 420                return NULL;
 421        }
 422        *size = xsize_t(sz);
 423        return data;
 424}
 425
 426void free_excludes(struct exclude_list *el)
 427{
 428        int i;
 429
 430        for (i = 0; i < el->nr; i++)
 431                free(el->excludes[i]);
 432        free(el->excludes);
 433
 434        el->nr = 0;
 435        el->excludes = NULL;
 436}
 437
 438int add_excludes_from_file_to_list(const char *fname,
 439                                   const char *base,
 440                                   int baselen,
 441                                   char **buf_p,
 442                                   struct exclude_list *which,
 443                                   int check_index)
 444{
 445        struct stat st;
 446        int fd, i;
 447        size_t size = 0;
 448        char *buf, *entry;
 449
 450        fd = open(fname, O_RDONLY);
 451        if (fd < 0 || fstat(fd, &st) < 0) {
 452                if (errno != ENOENT)
 453                        warn_on_inaccessible(fname);
 454                if (0 <= fd)
 455                        close(fd);
 456                if (!check_index ||
 457                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 458                        return -1;
 459                if (size == 0) {
 460                        free(buf);
 461                        return 0;
 462                }
 463                if (buf[size-1] != '\n') {
 464                        buf = xrealloc(buf, size+1);
 465                        buf[size++] = '\n';
 466                }
 467        }
 468        else {
 469                size = xsize_t(st.st_size);
 470                if (size == 0) {
 471                        close(fd);
 472                        return 0;
 473                }
 474                buf = xmalloc(size+1);
 475                if (read_in_full(fd, buf, size) != size) {
 476                        free(buf);
 477                        close(fd);
 478                        return -1;
 479                }
 480                buf[size++] = '\n';
 481                close(fd);
 482        }
 483
 484        if (buf_p)
 485                *buf_p = buf;
 486        entry = buf;
 487        for (i = 0; i < size; i++) {
 488                if (buf[i] == '\n') {
 489                        if (entry != buf + i && entry[0] != '#') {
 490                                buf[i - (i && buf[i-1] == '\r')] = 0;
 491                                add_exclude(entry, base, baselen, which);
 492                        }
 493                        entry = buf + i + 1;
 494                }
 495        }
 496        return 0;
 497}
 498
 499void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 500{
 501        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 502                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 503                die("cannot use %s as an exclude file", fname);
 504}
 505
 506static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 507{
 508        struct exclude_list *el;
 509        struct exclude_stack *stk = NULL;
 510        int current;
 511
 512        if ((!dir->exclude_per_dir) ||
 513            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 514                return; /* too long a path -- ignore */
 515
 516        /* Pop the ones that are not the prefix of the path being checked. */
 517        el = &dir->exclude_list[EXC_DIRS];
 518        while ((stk = dir->exclude_stack) != NULL) {
 519                if (stk->baselen <= baselen &&
 520                    !strncmp(dir->basebuf, base, stk->baselen))
 521                        break;
 522                dir->exclude_stack = stk->prev;
 523                while (stk->exclude_ix < el->nr)
 524                        free(el->excludes[--el->nr]);
 525                free(stk->filebuf);
 526                free(stk);
 527        }
 528
 529        /* Read from the parent directories and push them down. */
 530        current = stk ? stk->baselen : -1;
 531        while (current < baselen) {
 532                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 533                const char *cp;
 534
 535                if (current < 0) {
 536                        cp = base;
 537                        current = 0;
 538                }
 539                else {
 540                        cp = strchr(base + current + 1, '/');
 541                        if (!cp)
 542                                die("oops in prep_exclude");
 543                        cp++;
 544                }
 545                stk->prev = dir->exclude_stack;
 546                stk->baselen = cp - base;
 547                stk->exclude_ix = el->nr;
 548                memcpy(dir->basebuf + current, base + current,
 549                       stk->baselen - current);
 550                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 551                add_excludes_from_file_to_list(dir->basebuf,
 552                                               dir->basebuf, stk->baselen,
 553                                               &stk->filebuf, el, 1);
 554                dir->exclude_stack = stk;
 555                current = stk->baselen;
 556        }
 557        dir->basebuf[baselen] = '\0';
 558}
 559
 560int match_basename(const char *basename, int basenamelen,
 561                   const char *pattern, int prefix, int patternlen,
 562                   int flags)
 563{
 564        if (prefix == patternlen) {
 565                if (!strcmp_icase(pattern, basename))
 566                        return 1;
 567        } else if (flags & EXC_FLAG_ENDSWITH) {
 568                if (patternlen - 1 <= basenamelen &&
 569                    !strcmp_icase(pattern + 1,
 570                                  basename + basenamelen - patternlen + 1))
 571                        return 1;
 572        } else {
 573                if (fnmatch_icase(pattern, basename, 0) == 0)
 574                        return 1;
 575        }
 576        return 0;
 577}
 578
 579int match_pathname(const char *pathname, int pathlen,
 580                   const char *base, int baselen,
 581                   const char *pattern, int prefix, int patternlen,
 582                   int flags)
 583{
 584        const char *name;
 585        int namelen;
 586
 587        /*
 588         * match with FNM_PATHNAME; the pattern has base implicitly
 589         * in front of it.
 590         */
 591        if (*pattern == '/') {
 592                pattern++;
 593                prefix--;
 594        }
 595
 596        /*
 597         * baselen does not count the trailing slash. base[] may or
 598         * may not end with a trailing slash though.
 599         */
 600        if (pathlen < baselen + 1 ||
 601            (baselen && pathname[baselen] != '/') ||
 602            strncmp_icase(pathname, base, baselen))
 603                return 0;
 604
 605        namelen = baselen ? pathlen - baselen - 1 : pathlen;
 606        name = pathname + pathlen - namelen;
 607
 608        if (prefix) {
 609                /*
 610                 * if the non-wildcard part is longer than the
 611                 * remaining pathname, surely it cannot match.
 612                 */
 613                if (prefix > namelen)
 614                        return 0;
 615
 616                if (strncmp_icase(pattern, name, prefix))
 617                        return 0;
 618                pattern += prefix;
 619                name    += prefix;
 620                namelen -= prefix;
 621        }
 622
 623        return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
 624}
 625
 626/* Scan the list and let the last match determine the fate.
 627 * Return 1 for exclude, 0 for include and -1 for undecided.
 628 */
 629int excluded_from_list(const char *pathname,
 630                       int pathlen, const char *basename, int *dtype,
 631                       struct exclude_list *el)
 632{
 633        int i;
 634
 635        if (!el->nr)
 636                return -1;      /* undefined */
 637
 638        for (i = el->nr - 1; 0 <= i; i--) {
 639                struct exclude *x = el->excludes[i];
 640                const char *exclude = x->pattern;
 641                int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 642                int prefix = x->nowildcardlen;
 643
 644                if (x->flags & EXC_FLAG_MUSTBEDIR) {
 645                        if (*dtype == DT_UNKNOWN)
 646                                *dtype = get_dtype(NULL, pathname, pathlen);
 647                        if (*dtype != DT_DIR)
 648                                continue;
 649                }
 650
 651                if (x->flags & EXC_FLAG_NODIR) {
 652                        if (match_basename(basename,
 653                                           pathlen - (basename - pathname),
 654                                           exclude, prefix, x->patternlen,
 655                                           x->flags))
 656                                return to_exclude;
 657                        continue;
 658                }
 659
 660                assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
 661                if (match_pathname(pathname, pathlen,
 662                                   x->base, x->baselen ? x->baselen - 1 : 0,
 663                                   exclude, prefix, x->patternlen, x->flags))
 664                        return to_exclude;
 665        }
 666        return -1; /* undecided */
 667}
 668
 669static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 670{
 671        int pathlen = strlen(pathname);
 672        int st;
 673        const char *basename = strrchr(pathname, '/');
 674        basename = (basename) ? basename+1 : pathname;
 675
 676        prep_exclude(dir, pathname, basename-pathname);
 677        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 678                switch (excluded_from_list(pathname, pathlen, basename,
 679                                           dtype_p, &dir->exclude_list[st])) {
 680                case 0:
 681                        return 0;
 682                case 1:
 683                        return 1;
 684                }
 685        }
 686        return 0;
 687}
 688
 689void path_exclude_check_init(struct path_exclude_check *check,
 690                             struct dir_struct *dir)
 691{
 692        check->dir = dir;
 693        strbuf_init(&check->path, 256);
 694}
 695
 696void path_exclude_check_clear(struct path_exclude_check *check)
 697{
 698        strbuf_release(&check->path);
 699}
 700
 701/*
 702 * Is this name excluded?  This is for a caller like show_files() that
 703 * do not honor directory hierarchy and iterate through paths that are
 704 * possibly in an ignored directory.
 705 *
 706 * A path to a directory known to be excluded is left in check->path to
 707 * optimize for repeated checks for files in the same excluded directory.
 708 */
 709int path_excluded(struct path_exclude_check *check,
 710                  const char *name, int namelen, int *dtype)
 711{
 712        int i;
 713        struct strbuf *path = &check->path;
 714
 715        /*
 716         * we allow the caller to pass namelen as an optimization; it
 717         * must match the length of the name, as we eventually call
 718         * excluded() on the whole name string.
 719         */
 720        if (namelen < 0)
 721                namelen = strlen(name);
 722
 723        if (path->len &&
 724            path->len <= namelen &&
 725            !memcmp(name, path->buf, path->len) &&
 726            (!name[path->len] || name[path->len] == '/'))
 727                return 1;
 728
 729        strbuf_setlen(path, 0);
 730        for (i = 0; name[i]; i++) {
 731                int ch = name[i];
 732
 733                if (ch == '/') {
 734                        int dt = DT_DIR;
 735                        if (excluded(check->dir, path->buf, &dt))
 736                                return 1;
 737                }
 738                strbuf_addch(path, ch);
 739        }
 740
 741        /* An entry in the index; cannot be a directory with subentries */
 742        strbuf_setlen(path, 0);
 743
 744        return excluded(check->dir, name, dtype);
 745}
 746
 747static struct dir_entry *dir_entry_new(const char *pathname, int len)
 748{
 749        struct dir_entry *ent;
 750
 751        ent = xmalloc(sizeof(*ent) + len + 1);
 752        ent->len = len;
 753        memcpy(ent->name, pathname, len);
 754        ent->name[len] = 0;
 755        return ent;
 756}
 757
 758static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 759{
 760        if (cache_name_exists(pathname, len, ignore_case))
 761                return NULL;
 762
 763        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 764        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 765}
 766
 767struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 768{
 769        if (!cache_name_is_other(pathname, len))
 770                return NULL;
 771
 772        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 773        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 774}
 775
 776enum exist_status {
 777        index_nonexistent = 0,
 778        index_directory,
 779        index_gitdir
 780};
 781
 782/*
 783 * Do not use the alphabetically stored index to look up
 784 * the directory name; instead, use the case insensitive
 785 * name hash.
 786 */
 787static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 788{
 789        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 790        unsigned char endchar;
 791
 792        if (!ce)
 793                return index_nonexistent;
 794        endchar = ce->name[len];
 795
 796        /*
 797         * The cache_entry structure returned will contain this dirname
 798         * and possibly additional path components.
 799         */
 800        if (endchar == '/')
 801                return index_directory;
 802
 803        /*
 804         * If there are no additional path components, then this cache_entry
 805         * represents a submodule.  Submodules, despite being directories,
 806         * are stored in the cache without a closing slash.
 807         */
 808        if (!endchar && S_ISGITLINK(ce->ce_mode))
 809                return index_gitdir;
 810
 811        /* This should never be hit, but it exists just in case. */
 812        return index_nonexistent;
 813}
 814
 815/*
 816 * The index sorts alphabetically by entry name, which
 817 * means that a gitlink sorts as '\0' at the end, while
 818 * a directory (which is defined not as an entry, but as
 819 * the files it contains) will sort with the '/' at the
 820 * end.
 821 */
 822static enum exist_status directory_exists_in_index(const char *dirname, int len)
 823{
 824        int pos;
 825
 826        if (ignore_case)
 827                return directory_exists_in_index_icase(dirname, len);
 828
 829        pos = cache_name_pos(dirname, len);
 830        if (pos < 0)
 831                pos = -pos-1;
 832        while (pos < active_nr) {
 833                struct cache_entry *ce = active_cache[pos++];
 834                unsigned char endchar;
 835
 836                if (strncmp(ce->name, dirname, len))
 837                        break;
 838                endchar = ce->name[len];
 839                if (endchar > '/')
 840                        break;
 841                if (endchar == '/')
 842                        return index_directory;
 843                if (!endchar && S_ISGITLINK(ce->ce_mode))
 844                        return index_gitdir;
 845        }
 846        return index_nonexistent;
 847}
 848
 849/*
 850 * When we find a directory when traversing the filesystem, we
 851 * have three distinct cases:
 852 *
 853 *  - ignore it
 854 *  - see it as a directory
 855 *  - recurse into it
 856 *
 857 * and which one we choose depends on a combination of existing
 858 * git index contents and the flags passed into the directory
 859 * traversal routine.
 860 *
 861 * Case 1: If we *already* have entries in the index under that
 862 * directory name, we always recurse into the directory to see
 863 * all the files.
 864 *
 865 * Case 2: If we *already* have that directory name as a gitlink,
 866 * we always continue to see it as a gitlink, regardless of whether
 867 * there is an actual git directory there or not (it might not
 868 * be checked out as a subproject!)
 869 *
 870 * Case 3: if we didn't have it in the index previously, we
 871 * have a few sub-cases:
 872 *
 873 *  (a) if "show_other_directories" is true, we show it as
 874 *      just a directory, unless "hide_empty_directories" is
 875 *      also true and the directory is empty, in which case
 876 *      we just ignore it entirely.
 877 *  (b) if it looks like a git directory, and we don't have
 878 *      'no_gitlinks' set we treat it as a gitlink, and show it
 879 *      as a directory.
 880 *  (c) otherwise, we recurse into it.
 881 */
 882enum directory_treatment {
 883        show_directory,
 884        ignore_directory,
 885        recurse_into_directory
 886};
 887
 888static enum directory_treatment treat_directory(struct dir_struct *dir,
 889        const char *dirname, int len,
 890        const struct path_simplify *simplify)
 891{
 892        /* The "len-1" is to strip the final '/' */
 893        switch (directory_exists_in_index(dirname, len-1)) {
 894        case index_directory:
 895                return recurse_into_directory;
 896
 897        case index_gitdir:
 898                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 899                        return ignore_directory;
 900                return show_directory;
 901
 902        case index_nonexistent:
 903                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 904                        break;
 905                if (!(dir->flags & DIR_NO_GITLINKS)) {
 906                        unsigned char sha1[20];
 907                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 908                                return show_directory;
 909                }
 910                return recurse_into_directory;
 911        }
 912
 913        /* This is the "show_other_directories" case */
 914        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 915                return show_directory;
 916        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 917                return ignore_directory;
 918        return show_directory;
 919}
 920
 921/*
 922 * This is an inexact early pruning of any recursive directory
 923 * reading - if the path cannot possibly be in the pathspec,
 924 * return true, and we'll skip it early.
 925 */
 926static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 927{
 928        if (simplify) {
 929                for (;;) {
 930                        const char *match = simplify->path;
 931                        int len = simplify->len;
 932
 933                        if (!match)
 934                                break;
 935                        if (len > pathlen)
 936                                len = pathlen;
 937                        if (!memcmp(path, match, len))
 938                                return 0;
 939                        simplify++;
 940                }
 941                return 1;
 942        }
 943        return 0;
 944}
 945
 946/*
 947 * This function tells us whether an excluded path matches a
 948 * list of "interesting" pathspecs. That is, whether a path matched
 949 * by any of the pathspecs could possibly be ignored by excluding
 950 * the specified path. This can happen if:
 951 *
 952 *   1. the path is mentioned explicitly in the pathspec
 953 *
 954 *   2. the path is a directory prefix of some element in the
 955 *      pathspec
 956 */
 957static int exclude_matches_pathspec(const char *path, int len,
 958                const struct path_simplify *simplify)
 959{
 960        if (simplify) {
 961                for (; simplify->path; simplify++) {
 962                        if (len == simplify->len
 963                            && !memcmp(path, simplify->path, len))
 964                                return 1;
 965                        if (len < simplify->len
 966                            && simplify->path[len] == '/'
 967                            && !memcmp(path, simplify->path, len))
 968                                return 1;
 969                }
 970        }
 971        return 0;
 972}
 973
 974static int get_index_dtype(const char *path, int len)
 975{
 976        int pos;
 977        struct cache_entry *ce;
 978
 979        ce = cache_name_exists(path, len, 0);
 980        if (ce) {
 981                if (!ce_uptodate(ce))
 982                        return DT_UNKNOWN;
 983                if (S_ISGITLINK(ce->ce_mode))
 984                        return DT_DIR;
 985                /*
 986                 * Nobody actually cares about the
 987                 * difference between DT_LNK and DT_REG
 988                 */
 989                return DT_REG;
 990        }
 991
 992        /* Try to look it up as a directory */
 993        pos = cache_name_pos(path, len);
 994        if (pos >= 0)
 995                return DT_UNKNOWN;
 996        pos = -pos-1;
 997        while (pos < active_nr) {
 998                ce = active_cache[pos++];
 999                if (strncmp(ce->name, path, len))
1000                        break;
1001                if (ce->name[len] > '/')
1002                        break;
1003                if (ce->name[len] < '/')
1004                        continue;
1005                if (!ce_uptodate(ce))
1006                        break;  /* continue? */
1007                return DT_DIR;
1008        }
1009        return DT_UNKNOWN;
1010}
1011
1012static int get_dtype(struct dirent *de, const char *path, int len)
1013{
1014        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
1015        struct stat st;
1016
1017        if (dtype != DT_UNKNOWN)
1018                return dtype;
1019        dtype = get_index_dtype(path, len);
1020        if (dtype != DT_UNKNOWN)
1021                return dtype;
1022        if (lstat(path, &st))
1023                return dtype;
1024        if (S_ISREG(st.st_mode))
1025                return DT_REG;
1026        if (S_ISDIR(st.st_mode))
1027                return DT_DIR;
1028        if (S_ISLNK(st.st_mode))
1029                return DT_LNK;
1030        return dtype;
1031}
1032
1033enum path_treatment {
1034        path_ignored,
1035        path_handled,
1036        path_recurse
1037};
1038
1039static enum path_treatment treat_one_path(struct dir_struct *dir,
1040                                          struct strbuf *path,
1041                                          const struct path_simplify *simplify,
1042                                          int dtype, struct dirent *de)
1043{
1044        int exclude = excluded(dir, path->buf, &dtype);
1045        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
1046            && exclude_matches_pathspec(path->buf, path->len, simplify))
1047                dir_add_ignored(dir, path->buf, path->len);
1048
1049        /*
1050         * Excluded? If we don't explicitly want to show
1051         * ignored files, ignore it
1052         */
1053        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
1054                return path_ignored;
1055
1056        if (dtype == DT_UNKNOWN)
1057                dtype = get_dtype(de, path->buf, path->len);
1058
1059        /*
1060         * Do we want to see just the ignored files?
1061         * We still need to recurse into directories,
1062         * even if we don't ignore them, since the
1063         * directory may contain files that we do..
1064         */
1065        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
1066                if (dtype != DT_DIR)
1067                        return path_ignored;
1068        }
1069
1070        switch (dtype) {
1071        default:
1072                return path_ignored;
1073        case DT_DIR:
1074                strbuf_addch(path, '/');
1075                switch (treat_directory(dir, path->buf, path->len, simplify)) {
1076                case show_directory:
1077                        if (exclude != !!(dir->flags
1078                                          & DIR_SHOW_IGNORED))
1079                                return path_ignored;
1080                        break;
1081                case recurse_into_directory:
1082                        return path_recurse;
1083                case ignore_directory:
1084                        return path_ignored;
1085                }
1086                break;
1087        case DT_REG:
1088        case DT_LNK:
1089                break;
1090        }
1091        return path_handled;
1092}
1093
1094static enum path_treatment treat_path(struct dir_struct *dir,
1095                                      struct dirent *de,
1096                                      struct strbuf *path,
1097                                      int baselen,
1098                                      const struct path_simplify *simplify)
1099{
1100        int dtype;
1101
1102        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
1103                return path_ignored;
1104        strbuf_setlen(path, baselen);
1105        strbuf_addstr(path, de->d_name);
1106        if (simplify_away(path->buf, path->len, simplify))
1107                return path_ignored;
1108
1109        dtype = DTYPE(de);
1110        return treat_one_path(dir, path, simplify, dtype, de);
1111}
1112
1113/*
1114 * Read a directory tree. We currently ignore anything but
1115 * directories, regular files and symlinks. That's because git
1116 * doesn't handle them at all yet. Maybe that will change some
1117 * day.
1118 *
1119 * Also, we ignore the name ".git" (even if it is not a directory).
1120 * That likely will not change.
1121 */
1122static int read_directory_recursive(struct dir_struct *dir,
1123                                    const char *base, int baselen,
1124                                    int check_only,
1125                                    const struct path_simplify *simplify)
1126{
1127        DIR *fdir;
1128        int contents = 0;
1129        struct dirent *de;
1130        struct strbuf path = STRBUF_INIT;
1131
1132        strbuf_add(&path, base, baselen);
1133
1134        fdir = opendir(path.len ? path.buf : ".");
1135        if (!fdir)
1136                goto out;
1137
1138        while ((de = readdir(fdir)) != NULL) {
1139                switch (treat_path(dir, de, &path, baselen, simplify)) {
1140                case path_recurse:
1141                        contents += read_directory_recursive(dir, path.buf,
1142                                                             path.len, 0,
1143                                                             simplify);
1144                        continue;
1145                case path_ignored:
1146                        continue;
1147                case path_handled:
1148                        break;
1149                }
1150                contents++;
1151                if (check_only)
1152                        break;
1153                dir_add_name(dir, path.buf, path.len);
1154        }
1155        closedir(fdir);
1156 out:
1157        strbuf_release(&path);
1158
1159        return contents;
1160}
1161
1162static int cmp_name(const void *p1, const void *p2)
1163{
1164        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1165        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1166
1167        return cache_name_compare(e1->name, e1->len,
1168                                  e2->name, e2->len);
1169}
1170
1171static struct path_simplify *create_simplify(const char **pathspec)
1172{
1173        int nr, alloc = 0;
1174        struct path_simplify *simplify = NULL;
1175
1176        if (!pathspec)
1177                return NULL;
1178
1179        for (nr = 0 ; ; nr++) {
1180                const char *match;
1181                if (nr >= alloc) {
1182                        alloc = alloc_nr(alloc);
1183                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1184                }
1185                match = *pathspec++;
1186                if (!match)
1187                        break;
1188                simplify[nr].path = match;
1189                simplify[nr].len = simple_length(match);
1190        }
1191        simplify[nr].path = NULL;
1192        simplify[nr].len = 0;
1193        return simplify;
1194}
1195
1196static void free_simplify(struct path_simplify *simplify)
1197{
1198        free(simplify);
1199}
1200
1201static int treat_leading_path(struct dir_struct *dir,
1202                              const char *path, int len,
1203                              const struct path_simplify *simplify)
1204{
1205        struct strbuf sb = STRBUF_INIT;
1206        int baselen, rc = 0;
1207        const char *cp;
1208
1209        while (len && path[len - 1] == '/')
1210                len--;
1211        if (!len)
1212                return 1;
1213        baselen = 0;
1214        while (1) {
1215                cp = path + baselen + !!baselen;
1216                cp = memchr(cp, '/', path + len - cp);
1217                if (!cp)
1218                        baselen = len;
1219                else
1220                        baselen = cp - path;
1221                strbuf_setlen(&sb, 0);
1222                strbuf_add(&sb, path, baselen);
1223                if (!is_directory(sb.buf))
1224                        break;
1225                if (simplify_away(sb.buf, sb.len, simplify))
1226                        break;
1227                if (treat_one_path(dir, &sb, simplify,
1228                                   DT_DIR, NULL) == path_ignored)
1229                        break; /* do not recurse into it */
1230                if (len <= baselen) {
1231                        rc = 1;
1232                        break; /* finished checking */
1233                }
1234        }
1235        strbuf_release(&sb);
1236        return rc;
1237}
1238
1239int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1240{
1241        struct path_simplify *simplify;
1242
1243        if (has_symlink_leading_path(path, len))
1244                return dir->nr;
1245
1246        simplify = create_simplify(pathspec);
1247        if (!len || treat_leading_path(dir, path, len, simplify))
1248                read_directory_recursive(dir, path, len, 0, simplify);
1249        free_simplify(simplify);
1250        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1251        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1252        return dir->nr;
1253}
1254
1255int file_exists(const char *f)
1256{
1257        struct stat sb;
1258        return lstat(f, &sb) == 0;
1259}
1260
1261/*
1262 * Given two normalized paths (a trailing slash is ok), if subdir is
1263 * outside dir, return -1.  Otherwise return the offset in subdir that
1264 * can be used as relative path to dir.
1265 */
1266int dir_inside_of(const char *subdir, const char *dir)
1267{
1268        int offset = 0;
1269
1270        assert(dir && subdir && *dir && *subdir);
1271
1272        while (*dir && *subdir && *dir == *subdir) {
1273                dir++;
1274                subdir++;
1275                offset++;
1276        }
1277
1278        /* hel[p]/me vs hel[l]/yeah */
1279        if (*dir && *subdir)
1280                return -1;
1281
1282        if (!*subdir)
1283                return !*dir ? offset : -1; /* same dir */
1284
1285        /* foo/[b]ar vs foo/[] */
1286        if (is_dir_sep(dir[-1]))
1287                return is_dir_sep(subdir[-1]) ? offset : -1;
1288
1289        /* foo[/]bar vs foo[] */
1290        return is_dir_sep(*subdir) ? offset + 1 : -1;
1291}
1292
1293int is_inside_dir(const char *dir)
1294{
1295        char cwd[PATH_MAX];
1296        if (!dir)
1297                return 0;
1298        if (!getcwd(cwd, sizeof(cwd)))
1299                die_errno("can't find the current directory");
1300        return dir_inside_of(cwd, dir) >= 0;
1301}
1302
1303int is_empty_dir(const char *path)
1304{
1305        DIR *dir = opendir(path);
1306        struct dirent *e;
1307        int ret = 1;
1308
1309        if (!dir)
1310                return 0;
1311
1312        while ((e = readdir(dir)) != NULL)
1313                if (!is_dot_or_dotdot(e->d_name)) {
1314                        ret = 0;
1315                        break;
1316                }
1317
1318        closedir(dir);
1319        return ret;
1320}
1321
1322static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
1323{
1324        DIR *dir;
1325        struct dirent *e;
1326        int ret = 0, original_len = path->len, len, kept_down = 0;
1327        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1328        int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL);
1329        unsigned char submodule_head[20];
1330
1331        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1332            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {
1333                /* Do not descend and nuke a nested git work tree. */
1334                if (kept_up)
1335                        *kept_up = 1;
1336                return 0;
1337        }
1338
1339        flag &= ~REMOVE_DIR_KEEP_TOPLEVEL;
1340        dir = opendir(path->buf);
1341        if (!dir) {
1342                /* an empty dir could be removed even if it is unreadble */
1343                if (!keep_toplevel)
1344                        return rmdir(path->buf);
1345                else
1346                        return -1;
1347        }
1348        if (path->buf[original_len - 1] != '/')
1349                strbuf_addch(path, '/');
1350
1351        len = path->len;
1352        while ((e = readdir(dir)) != NULL) {
1353                struct stat st;
1354                if (is_dot_or_dotdot(e->d_name))
1355                        continue;
1356
1357                strbuf_setlen(path, len);
1358                strbuf_addstr(path, e->d_name);
1359                if (lstat(path->buf, &st))
1360                        ; /* fall thru */
1361                else if (S_ISDIR(st.st_mode)) {
1362                        if (!remove_dir_recurse(path, flag, &kept_down))
1363                                continue; /* happy */
1364                } else if (!only_empty && !unlink(path->buf))
1365                        continue; /* happy, too */
1366
1367                /* path too long, stat fails, or non-directory still exists */
1368                ret = -1;
1369                break;
1370        }
1371        closedir(dir);
1372
1373        strbuf_setlen(path, original_len);
1374        if (!ret && !keep_toplevel && !kept_down)
1375                ret = rmdir(path->buf);
1376        else if (kept_up)
1377                /*
1378                 * report the uplevel that it is not an error that we
1379                 * did not rmdir() our directory.
1380                 */
1381                *kept_up = !ret;
1382        return ret;
1383}
1384
1385int remove_dir_recursively(struct strbuf *path, int flag)
1386{
1387        return remove_dir_recurse(path, flag, NULL);
1388}
1389
1390void setup_standard_excludes(struct dir_struct *dir)
1391{
1392        const char *path;
1393        char *xdg_path;
1394
1395        dir->exclude_per_dir = ".gitignore";
1396        path = git_path("info/exclude");
1397        if (!excludes_file) {
1398                home_config_paths(NULL, &xdg_path, "ignore");
1399                excludes_file = xdg_path;
1400        }
1401        if (!access_or_warn(path, R_OK))
1402                add_excludes_from_file(dir, path);
1403        if (excludes_file && !access_or_warn(excludes_file, R_OK))
1404                add_excludes_from_file(dir, excludes_file);
1405}
1406
1407int remove_path(const char *name)
1408{
1409        char *slash;
1410
1411        if (unlink(name) && errno != ENOENT)
1412                return -1;
1413
1414        slash = strrchr(name, '/');
1415        if (slash) {
1416                char *dirs = xstrdup(name);
1417                slash = dirs + (slash - name);
1418                do {
1419                        *slash = '\0';
1420                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1421                free(dirs);
1422        }
1423        return 0;
1424}
1425
1426static int pathspec_item_cmp(const void *a_, const void *b_)
1427{
1428        struct pathspec_item *a, *b;
1429
1430        a = (struct pathspec_item *)a_;
1431        b = (struct pathspec_item *)b_;
1432        return strcmp(a->match, b->match);
1433}
1434
1435int init_pathspec(struct pathspec *pathspec, const char **paths)
1436{
1437        const char **p = paths;
1438        int i;
1439
1440        memset(pathspec, 0, sizeof(*pathspec));
1441        if (!p)
1442                return 0;
1443        while (*p)
1444                p++;
1445        pathspec->raw = paths;
1446        pathspec->nr = p - paths;
1447        if (!pathspec->nr)
1448                return 0;
1449
1450        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1451        for (i = 0; i < pathspec->nr; i++) {
1452                struct pathspec_item *item = pathspec->items+i;
1453                const char *path = paths[i];
1454
1455                item->match = path;
1456                item->len = strlen(path);
1457                item->nowildcard_len = simple_length(path);
1458                item->flags = 0;
1459                if (item->nowildcard_len < item->len) {
1460                        pathspec->has_wildcard = 1;
1461                        if (path[item->nowildcard_len] == '*' &&
1462                            no_wildcard(path + item->nowildcard_len + 1))
1463                                item->flags |= PATHSPEC_ONESTAR;
1464                }
1465        }
1466
1467        qsort(pathspec->items, pathspec->nr,
1468              sizeof(struct pathspec_item), pathspec_item_cmp);
1469
1470        return 0;
1471}
1472
1473void free_pathspec(struct pathspec *pathspec)
1474{
1475        free(pathspec->items);
1476        pathspec->items = NULL;
1477}