dir.con commit strbuf: add strbuf_add*_urlencode (c505116)
   1/*
   2 * This handles recursive filename detection with exclude
   3 * files, index knowledge etc..
   4 *
   5 * Copyright (C) Linus Torvalds, 2005-2006
   6 *               Junio Hamano, 2005-2006
   7 */
   8#include "cache.h"
   9#include "dir.h"
  10#include "refs.h"
  11
  12struct path_simplify {
  13        int len;
  14        const char *path;
  15};
  16
  17static int read_directory_recursive(struct dir_struct *dir, const char *path, int len,
  18        int check_only, const struct path_simplify *simplify);
  19static int get_dtype(struct dirent *de, const char *path, int len);
  20
  21/* helper string functions with support for the ignore_case flag */
  22int strcmp_icase(const char *a, const char *b)
  23{
  24        return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
  25}
  26
  27int strncmp_icase(const char *a, const char *b, size_t count)
  28{
  29        return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
  30}
  31
  32int fnmatch_icase(const char *pattern, const char *string, int flags)
  33{
  34        return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
  35}
  36
  37static size_t common_prefix_len(const char **pathspec)
  38{
  39        const char *n, *first;
  40        size_t max = 0;
  41
  42        if (!pathspec)
  43                return max;
  44
  45        first = *pathspec;
  46        while ((n = *pathspec++)) {
  47                size_t i, len = 0;
  48                for (i = 0; first == n || i < max; i++) {
  49                        char c = n[i];
  50                        if (!c || c != first[i] || is_glob_special(c))
  51                                break;
  52                        if (c == '/')
  53                                len = i + 1;
  54                }
  55                if (first == n || len < max) {
  56                        max = len;
  57                        if (!max)
  58                                break;
  59                }
  60        }
  61        return max;
  62}
  63
  64/*
  65 * Returns a copy of the longest leading path common among all
  66 * pathspecs.
  67 */
  68char *common_prefix(const char **pathspec)
  69{
  70        unsigned long len = common_prefix_len(pathspec);
  71
  72        return len ? xmemdupz(*pathspec, len) : NULL;
  73}
  74
  75int fill_directory(struct dir_struct *dir, const char **pathspec)
  76{
  77        const char *path;
  78        size_t len;
  79
  80        /*
  81         * Calculate common prefix for the pathspec, and
  82         * use that to optimize the directory walk
  83         */
  84        len = common_prefix_len(pathspec);
  85        path = "";
  86
  87        if (len)
  88                path = xmemdupz(*pathspec, len);
  89
  90        /* Read the directory and prune it */
  91        read_directory(dir, path, len, pathspec);
  92        if (*path)
  93                free((char *)path);
  94        return len;
  95}
  96
  97int within_depth(const char *name, int namelen,
  98                        int depth, int max_depth)
  99{
 100        const char *cp = name, *cpe = name + namelen;
 101
 102        while (cp < cpe) {
 103                if (*cp++ != '/')
 104                        continue;
 105                depth++;
 106                if (depth > max_depth)
 107                        return 0;
 108        }
 109        return 1;
 110}
 111
 112/*
 113 * Does 'match' match the given name?
 114 * A match is found if
 115 *
 116 * (1) the 'match' string is leading directory of 'name', or
 117 * (2) the 'match' string is a wildcard and matches 'name', or
 118 * (3) the 'match' string is exactly the same as 'name'.
 119 *
 120 * and the return value tells which case it was.
 121 *
 122 * It returns 0 when there is no match.
 123 */
 124static int match_one(const char *match, const char *name, int namelen)
 125{
 126        int matchlen;
 127
 128        /* If the match was just the prefix, we matched */
 129        if (!*match)
 130                return MATCHED_RECURSIVELY;
 131
 132        if (ignore_case) {
 133                for (;;) {
 134                        unsigned char c1 = tolower(*match);
 135                        unsigned char c2 = tolower(*name);
 136                        if (c1 == '\0' || is_glob_special(c1))
 137                                break;
 138                        if (c1 != c2)
 139                                return 0;
 140                        match++;
 141                        name++;
 142                        namelen--;
 143                }
 144        } else {
 145                for (;;) {
 146                        unsigned char c1 = *match;
 147                        unsigned char c2 = *name;
 148                        if (c1 == '\0' || is_glob_special(c1))
 149                                break;
 150                        if (c1 != c2)
 151                                return 0;
 152                        match++;
 153                        name++;
 154                        namelen--;
 155                }
 156        }
 157
 158
 159        /*
 160         * If we don't match the matchstring exactly,
 161         * we need to match by fnmatch
 162         */
 163        matchlen = strlen(match);
 164        if (strncmp_icase(match, name, matchlen))
 165                return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 166
 167        if (namelen == matchlen)
 168                return MATCHED_EXACTLY;
 169        if (match[matchlen-1] == '/' || name[matchlen] == '/')
 170                return MATCHED_RECURSIVELY;
 171        return 0;
 172}
 173
 174/*
 175 * Given a name and a list of pathspecs, see if the name matches
 176 * any of the pathspecs.  The caller is also interested in seeing
 177 * all pathspec matches some names it calls this function with
 178 * (otherwise the user could have mistyped the unmatched pathspec),
 179 * and a mark is left in seen[] array for pathspec element that
 180 * actually matched anything.
 181 */
 182int match_pathspec(const char **pathspec, const char *name, int namelen,
 183                int prefix, char *seen)
 184{
 185        int i, retval = 0;
 186
 187        if (!pathspec)
 188                return 1;
 189
 190        name += prefix;
 191        namelen -= prefix;
 192
 193        for (i = 0; pathspec[i] != NULL; i++) {
 194                int how;
 195                const char *match = pathspec[i] + prefix;
 196                if (seen && seen[i] == MATCHED_EXACTLY)
 197                        continue;
 198                how = match_one(match, name, namelen);
 199                if (how) {
 200                        if (retval < how)
 201                                retval = how;
 202                        if (seen && seen[i] < how)
 203                                seen[i] = how;
 204                }
 205        }
 206        return retval;
 207}
 208
 209/*
 210 * Does 'match' match the given name?
 211 * A match is found if
 212 *
 213 * (1) the 'match' string is leading directory of 'name', or
 214 * (2) the 'match' string is a wildcard and matches 'name', or
 215 * (3) the 'match' string is exactly the same as 'name'.
 216 *
 217 * and the return value tells which case it was.
 218 *
 219 * It returns 0 when there is no match.
 220 */
 221static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 222                               const char *name, int namelen)
 223{
 224        /* name/namelen has prefix cut off by caller */
 225        const char *match = item->match + prefix;
 226        int matchlen = item->len - prefix;
 227
 228        /* If the match was just the prefix, we matched */
 229        if (!*match)
 230                return MATCHED_RECURSIVELY;
 231
 232        if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
 233                if (matchlen == namelen)
 234                        return MATCHED_EXACTLY;
 235
 236                if (match[matchlen-1] == '/' || name[matchlen] == '/')
 237                        return MATCHED_RECURSIVELY;
 238        }
 239
 240        if (item->use_wildcard && !fnmatch(match, name, 0))
 241                return MATCHED_FNMATCH;
 242
 243        return 0;
 244}
 245
 246/*
 247 * Given a name and a list of pathspecs, see if the name matches
 248 * any of the pathspecs.  The caller is also interested in seeing
 249 * all pathspec matches some names it calls this function with
 250 * (otherwise the user could have mistyped the unmatched pathspec),
 251 * and a mark is left in seen[] array for pathspec element that
 252 * actually matched anything.
 253 */
 254int match_pathspec_depth(const struct pathspec *ps,
 255                         const char *name, int namelen,
 256                         int prefix, char *seen)
 257{
 258        int i, retval = 0;
 259
 260        if (!ps->nr) {
 261                if (!ps->recursive || ps->max_depth == -1)
 262                        return MATCHED_RECURSIVELY;
 263
 264                if (within_depth(name, namelen, 0, ps->max_depth))
 265                        return MATCHED_EXACTLY;
 266                else
 267                        return 0;
 268        }
 269
 270        name += prefix;
 271        namelen -= prefix;
 272
 273        for (i = ps->nr - 1; i >= 0; i--) {
 274                int how;
 275                if (seen && seen[i] == MATCHED_EXACTLY)
 276                        continue;
 277                how = match_pathspec_item(ps->items+i, prefix, name, namelen);
 278                if (ps->recursive && ps->max_depth != -1 &&
 279                    how && how != MATCHED_FNMATCH) {
 280                        int len = ps->items[i].len;
 281                        if (name[len] == '/')
 282                                len++;
 283                        if (within_depth(name+len, namelen-len, 0, ps->max_depth))
 284                                how = MATCHED_EXACTLY;
 285                        else
 286                                how = 0;
 287                }
 288                if (how) {
 289                        if (retval < how)
 290                                retval = how;
 291                        if (seen && seen[i] < how)
 292                                seen[i] = how;
 293                }
 294        }
 295        return retval;
 296}
 297
 298static int no_wildcard(const char *string)
 299{
 300        return string[strcspn(string, "*?[{\\")] == '\0';
 301}
 302
 303void add_exclude(const char *string, const char *base,
 304                 int baselen, struct exclude_list *which)
 305{
 306        struct exclude *x;
 307        size_t len;
 308        int to_exclude = 1;
 309        int flags = 0;
 310
 311        if (*string == '!') {
 312                to_exclude = 0;
 313                string++;
 314        }
 315        len = strlen(string);
 316        if (len && string[len - 1] == '/') {
 317                char *s;
 318                x = xmalloc(sizeof(*x) + len);
 319                s = (char *)(x+1);
 320                memcpy(s, string, len - 1);
 321                s[len - 1] = '\0';
 322                string = s;
 323                x->pattern = s;
 324                flags = EXC_FLAG_MUSTBEDIR;
 325        } else {
 326                x = xmalloc(sizeof(*x));
 327                x->pattern = string;
 328        }
 329        x->to_exclude = to_exclude;
 330        x->patternlen = strlen(string);
 331        x->base = base;
 332        x->baselen = baselen;
 333        x->flags = flags;
 334        if (!strchr(string, '/'))
 335                x->flags |= EXC_FLAG_NODIR;
 336        if (no_wildcard(string))
 337                x->flags |= EXC_FLAG_NOWILDCARD;
 338        if (*string == '*' && no_wildcard(string+1))
 339                x->flags |= EXC_FLAG_ENDSWITH;
 340        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 341        which->excludes[which->nr++] = x;
 342}
 343
 344static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 345{
 346        int pos, len;
 347        unsigned long sz;
 348        enum object_type type;
 349        void *data;
 350        struct index_state *istate = &the_index;
 351
 352        len = strlen(path);
 353        pos = index_name_pos(istate, path, len);
 354        if (pos < 0)
 355                return NULL;
 356        if (!ce_skip_worktree(istate->cache[pos]))
 357                return NULL;
 358        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 359        if (!data || type != OBJ_BLOB) {
 360                free(data);
 361                return NULL;
 362        }
 363        *size = xsize_t(sz);
 364        return data;
 365}
 366
 367void free_excludes(struct exclude_list *el)
 368{
 369        int i;
 370
 371        for (i = 0; i < el->nr; i++)
 372                free(el->excludes[i]);
 373        free(el->excludes);
 374
 375        el->nr = 0;
 376        el->excludes = NULL;
 377}
 378
 379int add_excludes_from_file_to_list(const char *fname,
 380                                   const char *base,
 381                                   int baselen,
 382                                   char **buf_p,
 383                                   struct exclude_list *which,
 384                                   int check_index)
 385{
 386        struct stat st;
 387        int fd, i;
 388        size_t size = 0;
 389        char *buf, *entry;
 390
 391        fd = open(fname, O_RDONLY);
 392        if (fd < 0 || fstat(fd, &st) < 0) {
 393                if (0 <= fd)
 394                        close(fd);
 395                if (!check_index ||
 396                    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
 397                        return -1;
 398                if (size == 0) {
 399                        free(buf);
 400                        return 0;
 401                }
 402                if (buf[size-1] != '\n') {
 403                        buf = xrealloc(buf, size+1);
 404                        buf[size++] = '\n';
 405                }
 406        }
 407        else {
 408                size = xsize_t(st.st_size);
 409                if (size == 0) {
 410                        close(fd);
 411                        return 0;
 412                }
 413                buf = xmalloc(size+1);
 414                if (read_in_full(fd, buf, size) != size) {
 415                        free(buf);
 416                        close(fd);
 417                        return -1;
 418                }
 419                buf[size++] = '\n';
 420                close(fd);
 421        }
 422
 423        if (buf_p)
 424                *buf_p = buf;
 425        entry = buf;
 426        for (i = 0; i < size; i++) {
 427                if (buf[i] == '\n') {
 428                        if (entry != buf + i && entry[0] != '#') {
 429                                buf[i - (i && buf[i-1] == '\r')] = 0;
 430                                add_exclude(entry, base, baselen, which);
 431                        }
 432                        entry = buf + i + 1;
 433                }
 434        }
 435        return 0;
 436}
 437
 438void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 439{
 440        if (add_excludes_from_file_to_list(fname, "", 0, NULL,
 441                                           &dir->exclude_list[EXC_FILE], 0) < 0)
 442                die("cannot use %s as an exclude file", fname);
 443}
 444
 445static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 446{
 447        struct exclude_list *el;
 448        struct exclude_stack *stk = NULL;
 449        int current;
 450
 451        if ((!dir->exclude_per_dir) ||
 452            (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
 453                return; /* too long a path -- ignore */
 454
 455        /* Pop the ones that are not the prefix of the path being checked. */
 456        el = &dir->exclude_list[EXC_DIRS];
 457        while ((stk = dir->exclude_stack) != NULL) {
 458                if (stk->baselen <= baselen &&
 459                    !strncmp(dir->basebuf, base, stk->baselen))
 460                        break;
 461                dir->exclude_stack = stk->prev;
 462                while (stk->exclude_ix < el->nr)
 463                        free(el->excludes[--el->nr]);
 464                free(stk->filebuf);
 465                free(stk);
 466        }
 467
 468        /* Read from the parent directories and push them down. */
 469        current = stk ? stk->baselen : -1;
 470        while (current < baselen) {
 471                struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 472                const char *cp;
 473
 474                if (current < 0) {
 475                        cp = base;
 476                        current = 0;
 477                }
 478                else {
 479                        cp = strchr(base + current + 1, '/');
 480                        if (!cp)
 481                                die("oops in prep_exclude");
 482                        cp++;
 483                }
 484                stk->prev = dir->exclude_stack;
 485                stk->baselen = cp - base;
 486                stk->exclude_ix = el->nr;
 487                memcpy(dir->basebuf + current, base + current,
 488                       stk->baselen - current);
 489                strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
 490                add_excludes_from_file_to_list(dir->basebuf,
 491                                               dir->basebuf, stk->baselen,
 492                                               &stk->filebuf, el, 1);
 493                dir->exclude_stack = stk;
 494                current = stk->baselen;
 495        }
 496        dir->basebuf[baselen] = '\0';
 497}
 498
 499/* Scan the list and let the last match determine the fate.
 500 * Return 1 for exclude, 0 for include and -1 for undecided.
 501 */
 502int excluded_from_list(const char *pathname,
 503                       int pathlen, const char *basename, int *dtype,
 504                       struct exclude_list *el)
 505{
 506        int i;
 507
 508        if (el->nr) {
 509                for (i = el->nr - 1; 0 <= i; i--) {
 510                        struct exclude *x = el->excludes[i];
 511                        const char *exclude = x->pattern;
 512                        int to_exclude = x->to_exclude;
 513
 514                        if (x->flags & EXC_FLAG_MUSTBEDIR) {
 515                                if (*dtype == DT_UNKNOWN)
 516                                        *dtype = get_dtype(NULL, pathname, pathlen);
 517                                if (*dtype != DT_DIR)
 518                                        continue;
 519                        }
 520
 521                        if (x->flags & EXC_FLAG_NODIR) {
 522                                /* match basename */
 523                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 524                                        if (!strcmp_icase(exclude, basename))
 525                                                return to_exclude;
 526                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
 527                                        if (x->patternlen - 1 <= pathlen &&
 528                                            !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
 529                                                return to_exclude;
 530                                } else {
 531                                        if (fnmatch_icase(exclude, basename, 0) == 0)
 532                                                return to_exclude;
 533                                }
 534                        }
 535                        else {
 536                                /* match with FNM_PATHNAME:
 537                                 * exclude has base (baselen long) implicitly
 538                                 * in front of it.
 539                                 */
 540                                int baselen = x->baselen;
 541                                if (*exclude == '/')
 542                                        exclude++;
 543
 544                                if (pathlen < baselen ||
 545                                    (baselen && pathname[baselen-1] != '/') ||
 546                                    strncmp_icase(pathname, x->base, baselen))
 547                                    continue;
 548
 549                                if (x->flags & EXC_FLAG_NOWILDCARD) {
 550                                        if (!strcmp_icase(exclude, pathname + baselen))
 551                                                return to_exclude;
 552                                } else {
 553                                        if (fnmatch_icase(exclude, pathname+baselen,
 554                                                    FNM_PATHNAME) == 0)
 555                                            return to_exclude;
 556                                }
 557                        }
 558                }
 559        }
 560        return -1; /* undecided */
 561}
 562
 563int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 564{
 565        int pathlen = strlen(pathname);
 566        int st;
 567        const char *basename = strrchr(pathname, '/');
 568        basename = (basename) ? basename+1 : pathname;
 569
 570        prep_exclude(dir, pathname, basename-pathname);
 571        for (st = EXC_CMDL; st <= EXC_FILE; st++) {
 572                switch (excluded_from_list(pathname, pathlen, basename,
 573                                           dtype_p, &dir->exclude_list[st])) {
 574                case 0:
 575                        return 0;
 576                case 1:
 577                        return 1;
 578                }
 579        }
 580        return 0;
 581}
 582
 583static struct dir_entry *dir_entry_new(const char *pathname, int len)
 584{
 585        struct dir_entry *ent;
 586
 587        ent = xmalloc(sizeof(*ent) + len + 1);
 588        ent->len = len;
 589        memcpy(ent->name, pathname, len);
 590        ent->name[len] = 0;
 591        return ent;
 592}
 593
 594static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 595{
 596        if (cache_name_exists(pathname, len, ignore_case))
 597                return NULL;
 598
 599        ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
 600        return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
 601}
 602
 603struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len)
 604{
 605        if (!cache_name_is_other(pathname, len))
 606                return NULL;
 607
 608        ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc);
 609        return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len);
 610}
 611
 612enum exist_status {
 613        index_nonexistent = 0,
 614        index_directory,
 615        index_gitdir
 616};
 617
 618/*
 619 * Do not use the alphabetically stored index to look up
 620 * the directory name; instead, use the case insensitive
 621 * name hash.
 622 */
 623static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 624{
 625        struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
 626        unsigned char endchar;
 627
 628        if (!ce)
 629                return index_nonexistent;
 630        endchar = ce->name[len];
 631
 632        /*
 633         * The cache_entry structure returned will contain this dirname
 634         * and possibly additional path components.
 635         */
 636        if (endchar == '/')
 637                return index_directory;
 638
 639        /*
 640         * If there are no additional path components, then this cache_entry
 641         * represents a submodule.  Submodules, despite being directories,
 642         * are stored in the cache without a closing slash.
 643         */
 644        if (!endchar && S_ISGITLINK(ce->ce_mode))
 645                return index_gitdir;
 646
 647        /* This should never be hit, but it exists just in case. */
 648        return index_nonexistent;
 649}
 650
 651/*
 652 * The index sorts alphabetically by entry name, which
 653 * means that a gitlink sorts as '\0' at the end, while
 654 * a directory (which is defined not as an entry, but as
 655 * the files it contains) will sort with the '/' at the
 656 * end.
 657 */
 658static enum exist_status directory_exists_in_index(const char *dirname, int len)
 659{
 660        int pos;
 661
 662        if (ignore_case)
 663                return directory_exists_in_index_icase(dirname, len);
 664
 665        pos = cache_name_pos(dirname, len);
 666        if (pos < 0)
 667                pos = -pos-1;
 668        while (pos < active_nr) {
 669                struct cache_entry *ce = active_cache[pos++];
 670                unsigned char endchar;
 671
 672                if (strncmp(ce->name, dirname, len))
 673                        break;
 674                endchar = ce->name[len];
 675                if (endchar > '/')
 676                        break;
 677                if (endchar == '/')
 678                        return index_directory;
 679                if (!endchar && S_ISGITLINK(ce->ce_mode))
 680                        return index_gitdir;
 681        }
 682        return index_nonexistent;
 683}
 684
 685/*
 686 * When we find a directory when traversing the filesystem, we
 687 * have three distinct cases:
 688 *
 689 *  - ignore it
 690 *  - see it as a directory
 691 *  - recurse into it
 692 *
 693 * and which one we choose depends on a combination of existing
 694 * git index contents and the flags passed into the directory
 695 * traversal routine.
 696 *
 697 * Case 1: If we *already* have entries in the index under that
 698 * directory name, we always recurse into the directory to see
 699 * all the files.
 700 *
 701 * Case 2: If we *already* have that directory name as a gitlink,
 702 * we always continue to see it as a gitlink, regardless of whether
 703 * there is an actual git directory there or not (it might not
 704 * be checked out as a subproject!)
 705 *
 706 * Case 3: if we didn't have it in the index previously, we
 707 * have a few sub-cases:
 708 *
 709 *  (a) if "show_other_directories" is true, we show it as
 710 *      just a directory, unless "hide_empty_directories" is
 711 *      also true and the directory is empty, in which case
 712 *      we just ignore it entirely.
 713 *  (b) if it looks like a git directory, and we don't have
 714 *      'no_gitlinks' set we treat it as a gitlink, and show it
 715 *      as a directory.
 716 *  (c) otherwise, we recurse into it.
 717 */
 718enum directory_treatment {
 719        show_directory,
 720        ignore_directory,
 721        recurse_into_directory
 722};
 723
 724static enum directory_treatment treat_directory(struct dir_struct *dir,
 725        const char *dirname, int len,
 726        const struct path_simplify *simplify)
 727{
 728        /* The "len-1" is to strip the final '/' */
 729        switch (directory_exists_in_index(dirname, len-1)) {
 730        case index_directory:
 731                return recurse_into_directory;
 732
 733        case index_gitdir:
 734                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 735                        return ignore_directory;
 736                return show_directory;
 737
 738        case index_nonexistent:
 739                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
 740                        break;
 741                if (!(dir->flags & DIR_NO_GITLINKS)) {
 742                        unsigned char sha1[20];
 743                        if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
 744                                return show_directory;
 745                }
 746                return recurse_into_directory;
 747        }
 748
 749        /* This is the "show_other_directories" case */
 750        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 751                return show_directory;
 752        if (!read_directory_recursive(dir, dirname, len, 1, simplify))
 753                return ignore_directory;
 754        return show_directory;
 755}
 756
 757/*
 758 * This is an inexact early pruning of any recursive directory
 759 * reading - if the path cannot possibly be in the pathspec,
 760 * return true, and we'll skip it early.
 761 */
 762static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
 763{
 764        if (simplify) {
 765                for (;;) {
 766                        const char *match = simplify->path;
 767                        int len = simplify->len;
 768
 769                        if (!match)
 770                                break;
 771                        if (len > pathlen)
 772                                len = pathlen;
 773                        if (!memcmp(path, match, len))
 774                                return 0;
 775                        simplify++;
 776                }
 777                return 1;
 778        }
 779        return 0;
 780}
 781
 782/*
 783 * This function tells us whether an excluded path matches a
 784 * list of "interesting" pathspecs. That is, whether a path matched
 785 * by any of the pathspecs could possibly be ignored by excluding
 786 * the specified path. This can happen if:
 787 *
 788 *   1. the path is mentioned explicitly in the pathspec
 789 *
 790 *   2. the path is a directory prefix of some element in the
 791 *      pathspec
 792 */
 793static int exclude_matches_pathspec(const char *path, int len,
 794                const struct path_simplify *simplify)
 795{
 796        if (simplify) {
 797                for (; simplify->path; simplify++) {
 798                        if (len == simplify->len
 799                            && !memcmp(path, simplify->path, len))
 800                                return 1;
 801                        if (len < simplify->len
 802                            && simplify->path[len] == '/'
 803                            && !memcmp(path, simplify->path, len))
 804                                return 1;
 805                }
 806        }
 807        return 0;
 808}
 809
 810static int get_index_dtype(const char *path, int len)
 811{
 812        int pos;
 813        struct cache_entry *ce;
 814
 815        ce = cache_name_exists(path, len, 0);
 816        if (ce) {
 817                if (!ce_uptodate(ce))
 818                        return DT_UNKNOWN;
 819                if (S_ISGITLINK(ce->ce_mode))
 820                        return DT_DIR;
 821                /*
 822                 * Nobody actually cares about the
 823                 * difference between DT_LNK and DT_REG
 824                 */
 825                return DT_REG;
 826        }
 827
 828        /* Try to look it up as a directory */
 829        pos = cache_name_pos(path, len);
 830        if (pos >= 0)
 831                return DT_UNKNOWN;
 832        pos = -pos-1;
 833        while (pos < active_nr) {
 834                ce = active_cache[pos++];
 835                if (strncmp(ce->name, path, len))
 836                        break;
 837                if (ce->name[len] > '/')
 838                        break;
 839                if (ce->name[len] < '/')
 840                        continue;
 841                if (!ce_uptodate(ce))
 842                        break;  /* continue? */
 843                return DT_DIR;
 844        }
 845        return DT_UNKNOWN;
 846}
 847
 848static int get_dtype(struct dirent *de, const char *path, int len)
 849{
 850        int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 851        struct stat st;
 852
 853        if (dtype != DT_UNKNOWN)
 854                return dtype;
 855        dtype = get_index_dtype(path, len);
 856        if (dtype != DT_UNKNOWN)
 857                return dtype;
 858        if (lstat(path, &st))
 859                return dtype;
 860        if (S_ISREG(st.st_mode))
 861                return DT_REG;
 862        if (S_ISDIR(st.st_mode))
 863                return DT_DIR;
 864        if (S_ISLNK(st.st_mode))
 865                return DT_LNK;
 866        return dtype;
 867}
 868
 869enum path_treatment {
 870        path_ignored,
 871        path_handled,
 872        path_recurse
 873};
 874
 875static enum path_treatment treat_one_path(struct dir_struct *dir,
 876                                          char *path, int *len,
 877                                          const struct path_simplify *simplify,
 878                                          int dtype, struct dirent *de)
 879{
 880        int exclude = excluded(dir, path, &dtype);
 881        if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 882            && exclude_matches_pathspec(path, *len, simplify))
 883                dir_add_ignored(dir, path, *len);
 884
 885        /*
 886         * Excluded? If we don't explicitly want to show
 887         * ignored files, ignore it
 888         */
 889        if (exclude && !(dir->flags & DIR_SHOW_IGNORED))
 890                return path_ignored;
 891
 892        if (dtype == DT_UNKNOWN)
 893                dtype = get_dtype(de, path, *len);
 894
 895        /*
 896         * Do we want to see just the ignored files?
 897         * We still need to recurse into directories,
 898         * even if we don't ignore them, since the
 899         * directory may contain files that we do..
 900         */
 901        if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {
 902                if (dtype != DT_DIR)
 903                        return path_ignored;
 904        }
 905
 906        switch (dtype) {
 907        default:
 908                return path_ignored;
 909        case DT_DIR:
 910                memcpy(path + *len, "/", 2);
 911                (*len)++;
 912                switch (treat_directory(dir, path, *len, simplify)) {
 913                case show_directory:
 914                        if (exclude != !!(dir->flags
 915                                          & DIR_SHOW_IGNORED))
 916                                return path_ignored;
 917                        break;
 918                case recurse_into_directory:
 919                        return path_recurse;
 920                case ignore_directory:
 921                        return path_ignored;
 922                }
 923                break;
 924        case DT_REG:
 925        case DT_LNK:
 926                break;
 927        }
 928        return path_handled;
 929}
 930
 931static enum path_treatment treat_path(struct dir_struct *dir,
 932                                      struct dirent *de,
 933                                      char *path, int path_max,
 934                                      int baselen,
 935                                      const struct path_simplify *simplify,
 936                                      int *len)
 937{
 938        int dtype;
 939
 940        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 941                return path_ignored;
 942        *len = strlen(de->d_name);
 943        /* Ignore overly long pathnames! */
 944        if (*len + baselen + 8 > path_max)
 945                return path_ignored;
 946        memcpy(path + baselen, de->d_name, *len + 1);
 947        *len += baselen;
 948        if (simplify_away(path, *len, simplify))
 949                return path_ignored;
 950
 951        dtype = DTYPE(de);
 952        return treat_one_path(dir, path, len, simplify, dtype, de);
 953}
 954
 955/*
 956 * Read a directory tree. We currently ignore anything but
 957 * directories, regular files and symlinks. That's because git
 958 * doesn't handle them at all yet. Maybe that will change some
 959 * day.
 960 *
 961 * Also, we ignore the name ".git" (even if it is not a directory).
 962 * That likely will not change.
 963 */
 964static int read_directory_recursive(struct dir_struct *dir,
 965                                    const char *base, int baselen,
 966                                    int check_only,
 967                                    const struct path_simplify *simplify)
 968{
 969        DIR *fdir = opendir(*base ? base : ".");
 970        int contents = 0;
 971
 972        if (fdir) {
 973                struct dirent *de;
 974                char path[PATH_MAX + 1];
 975                memcpy(path, base, baselen);
 976
 977                while ((de = readdir(fdir)) != NULL) {
 978                        int len;
 979                        switch (treat_path(dir, de, path, sizeof(path),
 980                                           baselen, simplify, &len)) {
 981                        case path_recurse:
 982                                contents += read_directory_recursive
 983                                        (dir, path, len, 0, simplify);
 984                                continue;
 985                        case path_ignored:
 986                                continue;
 987                        case path_handled:
 988                                break;
 989                        }
 990                        contents++;
 991                        if (check_only)
 992                                goto exit_early;
 993                        else
 994                                dir_add_name(dir, path, len);
 995                }
 996exit_early:
 997                closedir(fdir);
 998        }
 999
1000        return contents;
1001}
1002
1003static int cmp_name(const void *p1, const void *p2)
1004{
1005        const struct dir_entry *e1 = *(const struct dir_entry **)p1;
1006        const struct dir_entry *e2 = *(const struct dir_entry **)p2;
1007
1008        return cache_name_compare(e1->name, e1->len,
1009                                  e2->name, e2->len);
1010}
1011
1012/*
1013 * Return the length of the "simple" part of a path match limiter.
1014 */
1015static int simple_length(const char *match)
1016{
1017        int len = -1;
1018
1019        for (;;) {
1020                unsigned char c = *match++;
1021                len++;
1022                if (c == '\0' || is_glob_special(c))
1023                        return len;
1024        }
1025}
1026
1027static struct path_simplify *create_simplify(const char **pathspec)
1028{
1029        int nr, alloc = 0;
1030        struct path_simplify *simplify = NULL;
1031
1032        if (!pathspec)
1033                return NULL;
1034
1035        for (nr = 0 ; ; nr++) {
1036                const char *match;
1037                if (nr >= alloc) {
1038                        alloc = alloc_nr(alloc);
1039                        simplify = xrealloc(simplify, alloc * sizeof(*simplify));
1040                }
1041                match = *pathspec++;
1042                if (!match)
1043                        break;
1044                simplify[nr].path = match;
1045                simplify[nr].len = simple_length(match);
1046        }
1047        simplify[nr].path = NULL;
1048        simplify[nr].len = 0;
1049        return simplify;
1050}
1051
1052static void free_simplify(struct path_simplify *simplify)
1053{
1054        free(simplify);
1055}
1056
1057static int treat_leading_path(struct dir_struct *dir,
1058                              const char *path, int len,
1059                              const struct path_simplify *simplify)
1060{
1061        char pathbuf[PATH_MAX];
1062        int baselen, blen;
1063        const char *cp;
1064
1065        while (len && path[len - 1] == '/')
1066                len--;
1067        if (!len)
1068                return 1;
1069        baselen = 0;
1070        while (1) {
1071                cp = path + baselen + !!baselen;
1072                cp = memchr(cp, '/', path + len - cp);
1073                if (!cp)
1074                        baselen = len;
1075                else
1076                        baselen = cp - path;
1077                memcpy(pathbuf, path, baselen);
1078                pathbuf[baselen] = '\0';
1079                if (!is_directory(pathbuf))
1080                        return 0;
1081                if (simplify_away(pathbuf, baselen, simplify))
1082                        return 0;
1083                blen = baselen;
1084                if (treat_one_path(dir, pathbuf, &blen, simplify,
1085                                   DT_DIR, NULL) == path_ignored)
1086                        return 0; /* do not recurse into it */
1087                if (len <= baselen)
1088                        return 1; /* finished checking */
1089        }
1090}
1091
1092int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)
1093{
1094        struct path_simplify *simplify;
1095
1096        if (has_symlink_leading_path(path, len))
1097                return dir->nr;
1098
1099        simplify = create_simplify(pathspec);
1100        if (!len || treat_leading_path(dir, path, len, simplify))
1101                read_directory_recursive(dir, path, len, 0, simplify);
1102        free_simplify(simplify);
1103        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
1104        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
1105        return dir->nr;
1106}
1107
1108int file_exists(const char *f)
1109{
1110        struct stat sb;
1111        return lstat(f, &sb) == 0;
1112}
1113
1114/*
1115 * Given two normalized paths (a trailing slash is ok), if subdir is
1116 * outside dir, return -1.  Otherwise return the offset in subdir that
1117 * can be used as relative path to dir.
1118 */
1119int dir_inside_of(const char *subdir, const char *dir)
1120{
1121        int offset = 0;
1122
1123        assert(dir && subdir && *dir && *subdir);
1124
1125        while (*dir && *subdir && *dir == *subdir) {
1126                dir++;
1127                subdir++;
1128                offset++;
1129        }
1130
1131        /* hel[p]/me vs hel[l]/yeah */
1132        if (*dir && *subdir)
1133                return -1;
1134
1135        if (!*subdir)
1136                return !*dir ? offset : -1; /* same dir */
1137
1138        /* foo/[b]ar vs foo/[] */
1139        if (is_dir_sep(dir[-1]))
1140                return is_dir_sep(subdir[-1]) ? offset : -1;
1141
1142        /* foo[/]bar vs foo[] */
1143        return is_dir_sep(*subdir) ? offset + 1 : -1;
1144}
1145
1146int is_inside_dir(const char *dir)
1147{
1148        char cwd[PATH_MAX];
1149        if (!dir)
1150                return 0;
1151        if (!getcwd(cwd, sizeof(cwd)))
1152                die_errno("can't find the current directory");
1153        return dir_inside_of(cwd, dir) >= 0;
1154}
1155
1156int is_empty_dir(const char *path)
1157{
1158        DIR *dir = opendir(path);
1159        struct dirent *e;
1160        int ret = 1;
1161
1162        if (!dir)
1163                return 0;
1164
1165        while ((e = readdir(dir)) != NULL)
1166                if (!is_dot_or_dotdot(e->d_name)) {
1167                        ret = 0;
1168                        break;
1169                }
1170
1171        closedir(dir);
1172        return ret;
1173}
1174
1175int remove_dir_recursively(struct strbuf *path, int flag)
1176{
1177        DIR *dir;
1178        struct dirent *e;
1179        int ret = 0, original_len = path->len, len;
1180        int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);
1181        unsigned char submodule_head[20];
1182
1183        if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
1184            !resolve_gitlink_ref(path->buf, "HEAD", submodule_head))
1185                /* Do not descend and nuke a nested git work tree. */
1186                return 0;
1187
1188        dir = opendir(path->buf);
1189        if (!dir)
1190                return rmdir(path->buf);
1191        if (path->buf[original_len - 1] != '/')
1192                strbuf_addch(path, '/');
1193
1194        len = path->len;
1195        while ((e = readdir(dir)) != NULL) {
1196                struct stat st;
1197                if (is_dot_or_dotdot(e->d_name))
1198                        continue;
1199
1200                strbuf_setlen(path, len);
1201                strbuf_addstr(path, e->d_name);
1202                if (lstat(path->buf, &st))
1203                        ; /* fall thru */
1204                else if (S_ISDIR(st.st_mode)) {
1205                        if (!remove_dir_recursively(path, only_empty))
1206                                continue; /* happy */
1207                } else if (!only_empty && !unlink(path->buf))
1208                        continue; /* happy, too */
1209
1210                /* path too long, stat fails, or non-directory still exists */
1211                ret = -1;
1212                break;
1213        }
1214        closedir(dir);
1215
1216        strbuf_setlen(path, original_len);
1217        if (!ret)
1218                ret = rmdir(path->buf);
1219        return ret;
1220}
1221
1222void setup_standard_excludes(struct dir_struct *dir)
1223{
1224        const char *path;
1225
1226        dir->exclude_per_dir = ".gitignore";
1227        path = git_path("info/exclude");
1228        if (!access(path, R_OK))
1229                add_excludes_from_file(dir, path);
1230        if (excludes_file && !access(excludes_file, R_OK))
1231                add_excludes_from_file(dir, excludes_file);
1232}
1233
1234int remove_path(const char *name)
1235{
1236        char *slash;
1237
1238        if (unlink(name) && errno != ENOENT)
1239                return -1;
1240
1241        slash = strrchr(name, '/');
1242        if (slash) {
1243                char *dirs = xstrdup(name);
1244                slash = dirs + (slash - name);
1245                do {
1246                        *slash = '\0';
1247                } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));
1248                free(dirs);
1249        }
1250        return 0;
1251}
1252
1253static int pathspec_item_cmp(const void *a_, const void *b_)
1254{
1255        struct pathspec_item *a, *b;
1256
1257        a = (struct pathspec_item *)a_;
1258        b = (struct pathspec_item *)b_;
1259        return strcmp(a->match, b->match);
1260}
1261
1262int init_pathspec(struct pathspec *pathspec, const char **paths)
1263{
1264        const char **p = paths;
1265        int i;
1266
1267        memset(pathspec, 0, sizeof(*pathspec));
1268        if (!p)
1269                return 0;
1270        while (*p)
1271                p++;
1272        pathspec->raw = paths;
1273        pathspec->nr = p - paths;
1274        if (!pathspec->nr)
1275                return 0;
1276
1277        pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);
1278        for (i = 0; i < pathspec->nr; i++) {
1279                struct pathspec_item *item = pathspec->items+i;
1280                const char *path = paths[i];
1281
1282                item->match = path;
1283                item->len = strlen(path);
1284                item->use_wildcard = !no_wildcard(path);
1285                if (item->use_wildcard)
1286                        pathspec->has_wildcard = 1;
1287        }
1288
1289        qsort(pathspec->items, pathspec->nr,
1290              sizeof(struct pathspec_item), pathspec_item_cmp);
1291
1292        return 0;
1293}
1294
1295void free_pathspec(struct pathspec *pathspec)
1296{
1297        free(pathspec->items);
1298        pathspec->items = NULL;
1299}