1/* 2 * This handles recursive filename detection with exclude 3 * files, index knowledge etc.. 4 * 5 * See Documentation/technical/api-directory-listing.txt 6 * 7 * Copyright (C) Linus Torvalds, 2005-2006 8 * Junio Hamano, 2005-2006 9 */ 10#include "cache.h" 11#include "dir.h" 12#include "refs.h" 13 14struct path_simplify { 15 int len; 16 const char *path; 17}; 18 19static int read_directory_recursive(struct dir_struct *dir, const char *path, int len, 20 int check_only, const struct path_simplify *simplify); 21static int get_dtype(struct dirent *de, const char *path, int len); 22 23/* helper string functions with support for the ignore_case flag */ 24int strcmp_icase(const char *a, const char *b) 25{ 26 return ignore_case ? strcasecmp(a, b) : strcmp(a, b); 27} 28 29int strncmp_icase(const char *a, const char *b, size_t count) 30{ 31 return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); 32} 33 34int fnmatch_icase(const char *pattern, const char *string, int flags) 35{ 36 return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0)); 37} 38 39static size_t common_prefix_len(const char **pathspec) 40{ 41 const char *n, *first; 42 size_t max = 0; 43 44 if (!pathspec) 45 return max; 46 47 first = *pathspec; 48 while ((n = *pathspec++)) { 49 size_t i, len = 0; 50 for (i = 0; first == n || i < max; i++) { 51 char c = n[i]; 52 if (!c || c != first[i] || is_glob_special(c)) 53 break; 54 if (c == '/') 55 len = i + 1; 56 } 57 if (first == n || len < max) { 58 max = len; 59 if (!max) 60 break; 61 } 62 } 63 return max; 64} 65 66/* 67 * Returns a copy of the longest leading path common among all 68 * pathspecs. 69 */ 70char *common_prefix(const char **pathspec) 71{ 72 unsigned long len = common_prefix_len(pathspec); 73 74 return len ? xmemdupz(*pathspec, len) : NULL; 75} 76 77int fill_directory(struct dir_struct *dir, const char **pathspec) 78{ 79 size_t len; 80 81 /* 82 * Calculate common prefix for the pathspec, and 83 * use that to optimize the directory walk 84 */ 85 len = common_prefix_len(pathspec); 86 87 /* Read the directory and prune it */ 88 read_directory(dir, pathspec ? *pathspec : "", len, pathspec); 89 return len; 90} 91 92int within_depth(const char *name, int namelen, 93 int depth, int max_depth) 94{ 95 const char *cp = name, *cpe = name + namelen; 96 97 while (cp < cpe) { 98 if (*cp++ != '/') 99 continue; 100 depth++; 101 if (depth > max_depth) 102 return 0; 103 } 104 return 1; 105} 106 107/* 108 * Does 'match' match the given name? 109 * A match is found if 110 * 111 * (1) the 'match' string is leading directory of 'name', or 112 * (2) the 'match' string is a wildcard and matches 'name', or 113 * (3) the 'match' string is exactly the same as 'name'. 114 * 115 * and the return value tells which case it was. 116 * 117 * It returns 0 when there is no match. 118 */ 119static int match_one(const char *match, const char *name, int namelen) 120{ 121 int matchlen; 122 123 /* If the match was just the prefix, we matched */ 124 if (!*match) 125 return MATCHED_RECURSIVELY; 126 127 if (ignore_case) { 128 for (;;) { 129 unsigned char c1 = tolower(*match); 130 unsigned char c2 = tolower(*name); 131 if (c1 == '\0' || is_glob_special(c1)) 132 break; 133 if (c1 != c2) 134 return 0; 135 match++; 136 name++; 137 namelen--; 138 } 139 } else { 140 for (;;) { 141 unsigned char c1 = *match; 142 unsigned char c2 = *name; 143 if (c1 == '\0' || is_glob_special(c1)) 144 break; 145 if (c1 != c2) 146 return 0; 147 match++; 148 name++; 149 namelen--; 150 } 151 } 152 153 154 /* 155 * If we don't match the matchstring exactly, 156 * we need to match by fnmatch 157 */ 158 matchlen = strlen(match); 159 if (strncmp_icase(match, name, matchlen)) 160 return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0; 161 162 if (namelen == matchlen) 163 return MATCHED_EXACTLY; 164 if (match[matchlen-1] == '/' || name[matchlen] == '/') 165 return MATCHED_RECURSIVELY; 166 return 0; 167} 168 169/* 170 * Given a name and a list of pathspecs, see if the name matches 171 * any of the pathspecs. The caller is also interested in seeing 172 * all pathspec matches some names it calls this function with 173 * (otherwise the user could have mistyped the unmatched pathspec), 174 * and a mark is left in seen[] array for pathspec element that 175 * actually matched anything. 176 */ 177int match_pathspec(const char **pathspec, const char *name, int namelen, 178 int prefix, char *seen) 179{ 180 int i, retval = 0; 181 182 if (!pathspec) 183 return 1; 184 185 name += prefix; 186 namelen -= prefix; 187 188 for (i = 0; pathspec[i] != NULL; i++) { 189 int how; 190 const char *match = pathspec[i] + prefix; 191 if (seen && seen[i] == MATCHED_EXACTLY) 192 continue; 193 how = match_one(match, name, namelen); 194 if (how) { 195 if (retval < how) 196 retval = how; 197 if (seen && seen[i] < how) 198 seen[i] = how; 199 } 200 } 201 return retval; 202} 203 204/* 205 * Does 'match' match the given name? 206 * A match is found if 207 * 208 * (1) the 'match' string is leading directory of 'name', or 209 * (2) the 'match' string is a wildcard and matches 'name', or 210 * (3) the 'match' string is exactly the same as 'name'. 211 * 212 * and the return value tells which case it was. 213 * 214 * It returns 0 when there is no match. 215 */ 216static int match_pathspec_item(const struct pathspec_item *item, int prefix, 217 const char *name, int namelen) 218{ 219 /* name/namelen has prefix cut off by caller */ 220 const char *match = item->match + prefix; 221 int matchlen = item->len - prefix; 222 223 /* If the match was just the prefix, we matched */ 224 if (!*match) 225 return MATCHED_RECURSIVELY; 226 227 if (matchlen <= namelen && !strncmp(match, name, matchlen)) { 228 if (matchlen == namelen) 229 return MATCHED_EXACTLY; 230 231 if (match[matchlen-1] == '/' || name[matchlen] == '/') 232 return MATCHED_RECURSIVELY; 233 } 234 235 if (item->use_wildcard && !fnmatch(match, name, 0)) 236 return MATCHED_FNMATCH; 237 238 return 0; 239} 240 241/* 242 * Given a name and a list of pathspecs, see if the name matches 243 * any of the pathspecs. The caller is also interested in seeing 244 * all pathspec matches some names it calls this function with 245 * (otherwise the user could have mistyped the unmatched pathspec), 246 * and a mark is left in seen[] array for pathspec element that 247 * actually matched anything. 248 */ 249int match_pathspec_depth(const struct pathspec *ps, 250 const char *name, int namelen, 251 int prefix, char *seen) 252{ 253 int i, retval = 0; 254 255 if (!ps->nr) { 256 if (!ps->recursive || ps->max_depth == -1) 257 return MATCHED_RECURSIVELY; 258 259 if (within_depth(name, namelen, 0, ps->max_depth)) 260 return MATCHED_EXACTLY; 261 else 262 return 0; 263 } 264 265 name += prefix; 266 namelen -= prefix; 267 268 for (i = ps->nr - 1; i >= 0; i--) { 269 int how; 270 if (seen && seen[i] == MATCHED_EXACTLY) 271 continue; 272 how = match_pathspec_item(ps->items+i, prefix, name, namelen); 273 if (ps->recursive && ps->max_depth != -1 && 274 how && how != MATCHED_FNMATCH) { 275 int len = ps->items[i].len; 276 if (name[len] == '/') 277 len++; 278 if (within_depth(name+len, namelen-len, 0, ps->max_depth)) 279 how = MATCHED_EXACTLY; 280 else 281 how = 0; 282 } 283 if (how) { 284 if (retval < how) 285 retval = how; 286 if (seen && seen[i] < how) 287 seen[i] = how; 288 } 289 } 290 return retval; 291} 292 293/* 294 * Return the length of the "simple" part of a path match limiter. 295 */ 296static int simple_length(const char *match) 297{ 298 int len = -1; 299 300 for (;;) { 301 unsigned char c = *match++; 302 len++; 303 if (c == '\0' || is_glob_special(c)) 304 return len; 305 } 306} 307 308static int no_wildcard(const char *string) 309{ 310 return string[simple_length(string)] == '\0'; 311} 312 313void parse_exclude_pattern(const char **pattern, 314 int *patternlen, 315 int *flags, 316 int *nowildcardlen) 317{ 318 const char *p = *pattern; 319 size_t i, len; 320 321 *flags = 0; 322 if (*p == '!') { 323 *flags |= EXC_FLAG_NEGATIVE; 324 p++; 325 } 326 len = strlen(p); 327 if (len && p[len - 1] == '/') { 328 len--; 329 *flags |= EXC_FLAG_MUSTBEDIR; 330 } 331 for (i = 0; i < len; i++) { 332 if (p[i] == '/') 333 break; 334 } 335 if (i == len) 336 *flags |= EXC_FLAG_NODIR; 337 *nowildcardlen = simple_length(p); 338 /* 339 * we should have excluded the trailing slash from 'p' too, 340 * but that's one more allocation. Instead just make sure 341 * nowildcardlen does not exceed real patternlen 342 */ 343 if (*nowildcardlen > len) 344 *nowildcardlen = len; 345 if (*p == '*' && no_wildcard(p + 1)) 346 *flags |= EXC_FLAG_ENDSWITH; 347 *pattern = p; 348 *patternlen = len; 349} 350 351void add_exclude(const char *string, const char *base, 352 int baselen, struct exclude_list *el) 353{ 354 struct exclude *x; 355 int patternlen; 356 int flags; 357 int nowildcardlen; 358 359 parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen); 360 if (flags & EXC_FLAG_MUSTBEDIR) { 361 char *s; 362 x = xmalloc(sizeof(*x) + patternlen + 1); 363 s = (char *)(x+1); 364 memcpy(s, string, patternlen); 365 s[patternlen] = '\0'; 366 x->pattern = s; 367 } else { 368 x = xmalloc(sizeof(*x)); 369 x->pattern = string; 370 } 371 x->patternlen = patternlen; 372 x->nowildcardlen = nowildcardlen; 373 x->base = base; 374 x->baselen = baselen; 375 x->flags = flags; 376 ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); 377 el->excludes[el->nr++] = x; 378} 379 380static void *read_skip_worktree_file_from_index(const char *path, size_t *size) 381{ 382 int pos, len; 383 unsigned long sz; 384 enum object_type type; 385 void *data; 386 struct index_state *istate = &the_index; 387 388 len = strlen(path); 389 pos = index_name_pos(istate, path, len); 390 if (pos < 0) 391 return NULL; 392 if (!ce_skip_worktree(istate->cache[pos])) 393 return NULL; 394 data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz); 395 if (!data || type != OBJ_BLOB) { 396 free(data); 397 return NULL; 398 } 399 *size = xsize_t(sz); 400 return data; 401} 402 403void free_excludes(struct exclude_list *el) 404{ 405 int i; 406 407 for (i = 0; i < el->nr; i++) 408 free(el->excludes[i]); 409 free(el->excludes); 410 411 el->nr = 0; 412 el->excludes = NULL; 413} 414 415int add_excludes_from_file_to_list(const char *fname, 416 const char *base, 417 int baselen, 418 char **buf_p, 419 struct exclude_list *el, 420 int check_index) 421{ 422 struct stat st; 423 int fd, i; 424 size_t size = 0; 425 char *buf, *entry; 426 427 fd = open(fname, O_RDONLY); 428 if (fd < 0 || fstat(fd, &st) < 0) { 429 if (0 <= fd) 430 close(fd); 431 if (!check_index || 432 (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL) 433 return -1; 434 if (size == 0) { 435 free(buf); 436 return 0; 437 } 438 if (buf[size-1] != '\n') { 439 buf = xrealloc(buf, size+1); 440 buf[size++] = '\n'; 441 } 442 } 443 else { 444 size = xsize_t(st.st_size); 445 if (size == 0) { 446 close(fd); 447 return 0; 448 } 449 buf = xmalloc(size+1); 450 if (read_in_full(fd, buf, size) != size) { 451 free(buf); 452 close(fd); 453 return -1; 454 } 455 buf[size++] = '\n'; 456 close(fd); 457 } 458 459 if (buf_p) 460 *buf_p = buf; 461 entry = buf; 462 for (i = 0; i < size; i++) { 463 if (buf[i] == '\n') { 464 if (entry != buf + i && entry[0] != '#') { 465 buf[i - (i && buf[i-1] == '\r')] = 0; 466 add_exclude(entry, base, baselen, el); 467 } 468 entry = buf + i + 1; 469 } 470 } 471 return 0; 472} 473 474void add_excludes_from_file(struct dir_struct *dir, const char *fname) 475{ 476 if (add_excludes_from_file_to_list(fname, "", 0, NULL, 477 &dir->exclude_list[EXC_FILE], 0) < 0) 478 die("cannot use %s as an exclude file", fname); 479} 480 481/* 482 * Loads the per-directory exclude list for the substring of base 483 * which has a char length of baselen. 484 */ 485static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) 486{ 487 struct exclude_list *el; 488 struct exclude_stack *stk = NULL; 489 int current; 490 491 if ((!dir->exclude_per_dir) || 492 (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) 493 return; /* too long a path -- ignore */ 494 495 /* Pop the directories that are not the prefix of the path being checked. */ 496 el = &dir->exclude_list[EXC_DIRS]; 497 while ((stk = dir->exclude_stack) != NULL) { 498 if (stk->baselen <= baselen && 499 !strncmp(dir->basebuf, base, stk->baselen)) 500 break; 501 dir->exclude_stack = stk->prev; 502 while (stk->exclude_ix < el->nr) 503 free(el->excludes[--el->nr]); 504 free(stk->filebuf); 505 free(stk); 506 } 507 508 /* Read from the parent directories and push them down. */ 509 current = stk ? stk->baselen : -1; 510 while (current < baselen) { 511 struct exclude_stack *stk = xcalloc(1, sizeof(*stk)); 512 const char *cp; 513 514 if (current < 0) { 515 cp = base; 516 current = 0; 517 } 518 else { 519 cp = strchr(base + current + 1, '/'); 520 if (!cp) 521 die("oops in prep_exclude"); 522 cp++; 523 } 524 stk->prev = dir->exclude_stack; 525 stk->baselen = cp - base; 526 stk->exclude_ix = el->nr; 527 memcpy(dir->basebuf + current, base + current, 528 stk->baselen - current); 529 strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); 530 add_excludes_from_file_to_list(dir->basebuf, 531 dir->basebuf, stk->baselen, 532 &stk->filebuf, el, 1); 533 dir->exclude_stack = stk; 534 current = stk->baselen; 535 } 536 dir->basebuf[baselen] = '\0'; 537} 538 539int match_basename(const char *basename, int basenamelen, 540 const char *pattern, int prefix, int patternlen, 541 int flags) 542{ 543 if (prefix == patternlen) { 544 if (!strcmp_icase(pattern, basename)) 545 return 1; 546 } else if (flags & EXC_FLAG_ENDSWITH) { 547 if (patternlen - 1 <= basenamelen && 548 !strcmp_icase(pattern + 1, 549 basename + basenamelen - patternlen + 1)) 550 return 1; 551 } else { 552 if (fnmatch_icase(pattern, basename, 0) == 0) 553 return 1; 554 } 555 return 0; 556} 557 558int match_pathname(const char *pathname, int pathlen, 559 const char *base, int baselen, 560 const char *pattern, int prefix, int patternlen, 561 int flags) 562{ 563 const char *name; 564 int namelen; 565 566 /* 567 * match with FNM_PATHNAME; the pattern has base implicitly 568 * in front of it. 569 */ 570 if (*pattern == '/') { 571 pattern++; 572 prefix--; 573 } 574 575 /* 576 * baselen does not count the trailing slash. base[] may or 577 * may not end with a trailing slash though. 578 */ 579 if (pathlen < baselen + 1 || 580 (baselen && pathname[baselen] != '/') || 581 strncmp_icase(pathname, base, baselen)) 582 return 0; 583 584 namelen = baselen ? pathlen - baselen - 1 : pathlen; 585 name = pathname + pathlen - namelen; 586 587 if (prefix) { 588 /* 589 * if the non-wildcard part is longer than the 590 * remaining pathname, surely it cannot match. 591 */ 592 if (prefix > namelen) 593 return 0; 594 595 if (strncmp_icase(pattern, name, prefix)) 596 return 0; 597 pattern += prefix; 598 name += prefix; 599 namelen -= prefix; 600 } 601 602 return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; 603} 604 605/* 606 * Scan the given exclude list in reverse to see whether pathname 607 * should be ignored. The first match (i.e. the last on the list), if 608 * any, determines the fate. Returns the exclude_list element which 609 * matched, or NULL for undecided. 610 */ 611static struct exclude *last_exclude_matching_from_list(const char *pathname, 612 int pathlen, 613 const char *basename, 614 int *dtype, 615 struct exclude_list *el) 616{ 617 int i; 618 619 if (!el->nr) 620 return NULL; /* undefined */ 621 622 for (i = el->nr - 1; 0 <= i; i--) { 623 struct exclude *x = el->excludes[i]; 624 const char *exclude = x->pattern; 625 int prefix = x->nowildcardlen; 626 627 if (x->flags & EXC_FLAG_MUSTBEDIR) { 628 if (*dtype == DT_UNKNOWN) 629 *dtype = get_dtype(NULL, pathname, pathlen); 630 if (*dtype != DT_DIR) 631 continue; 632 } 633 634 if (x->flags & EXC_FLAG_NODIR) { 635 if (match_basename(basename, 636 pathlen - (basename - pathname), 637 exclude, prefix, x->patternlen, 638 x->flags)) 639 return x; 640 continue; 641 } 642 643 assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); 644 if (match_pathname(pathname, pathlen, 645 x->base, x->baselen ? x->baselen - 1 : 0, 646 exclude, prefix, x->patternlen, x->flags)) 647 return x; 648 } 649 return NULL; /* undecided */ 650} 651 652/* 653 * Scan the list and let the last match determine the fate. 654 * Return 1 for exclude, 0 for include and -1 for undecided. 655 */ 656int is_excluded_from_list(const char *pathname, 657 int pathlen, const char *basename, int *dtype, 658 struct exclude_list *el) 659{ 660 struct exclude *exclude; 661 exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el); 662 if (exclude) 663 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 664 return -1; /* undecided */ 665} 666 667/* 668 * Loads the exclude lists for the directory containing pathname, then 669 * scans all exclude lists to determine whether pathname is excluded. 670 * Returns the exclude_list element which matched, or NULL for 671 * undecided. 672 */ 673static struct exclude *last_exclude_matching(struct dir_struct *dir, 674 const char *pathname, 675 int *dtype_p) 676{ 677 int pathlen = strlen(pathname); 678 int st; 679 struct exclude *exclude; 680 const char *basename = strrchr(pathname, '/'); 681 basename = (basename) ? basename+1 : pathname; 682 683 prep_exclude(dir, pathname, basename-pathname); 684 for (st = EXC_CMDL; st <= EXC_FILE; st++) { 685 exclude = last_exclude_matching_from_list( 686 pathname, pathlen, basename, dtype_p, 687 &dir->exclude_list[st]); 688 if (exclude) 689 return exclude; 690 } 691 return NULL; 692} 693 694/* 695 * Loads the exclude lists for the directory containing pathname, then 696 * scans all exclude lists to determine whether pathname is excluded. 697 * Returns 1 if true, otherwise 0. 698 */ 699static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) 700{ 701 struct exclude *exclude = 702 last_exclude_matching(dir, pathname, dtype_p); 703 if (exclude) 704 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 705 return 0; 706} 707 708void path_exclude_check_init(struct path_exclude_check *check, 709 struct dir_struct *dir) 710{ 711 check->dir = dir; 712 check->exclude = NULL; 713 strbuf_init(&check->path, 256); 714} 715 716void path_exclude_check_clear(struct path_exclude_check *check) 717{ 718 strbuf_release(&check->path); 719} 720 721/* 722 * For each subdirectory in name, starting with the top-most, checks 723 * to see if that subdirectory is excluded, and if so, returns the 724 * corresponding exclude structure. Otherwise, checks whether name 725 * itself (which is presumably a file) is excluded. 726 * 727 * A path to a directory known to be excluded is left in check->path to 728 * optimize for repeated checks for files in the same excluded directory. 729 */ 730struct exclude *last_exclude_matching_path(struct path_exclude_check *check, 731 const char *name, int namelen, 732 int *dtype) 733{ 734 int i; 735 struct strbuf *path = &check->path; 736 struct exclude *exclude; 737 738 /* 739 * we allow the caller to pass namelen as an optimization; it 740 * must match the length of the name, as we eventually call 741 * is_excluded() on the whole name string. 742 */ 743 if (namelen < 0) 744 namelen = strlen(name); 745 746 /* 747 * If path is non-empty, and name is equal to path or a 748 * subdirectory of path, name should be excluded, because 749 * it's inside a directory which is already known to be 750 * excluded and was previously left in check->path. 751 */ 752 if (path->len && 753 path->len <= namelen && 754 !memcmp(name, path->buf, path->len) && 755 (!name[path->len] || name[path->len] == '/')) 756 return check->exclude; 757 758 strbuf_setlen(path, 0); 759 for (i = 0; name[i]; i++) { 760 int ch = name[i]; 761 762 if (ch == '/') { 763 int dt = DT_DIR; 764 exclude = last_exclude_matching(check->dir, 765 path->buf, &dt); 766 if (exclude) { 767 check->exclude = exclude; 768 return exclude; 769 } 770 } 771 strbuf_addch(path, ch); 772 } 773 774 /* An entry in the index; cannot be a directory with subentries */ 775 strbuf_setlen(path, 0); 776 777 return last_exclude_matching(check->dir, name, dtype); 778} 779 780/* 781 * Is this name excluded? This is for a caller like show_files() that 782 * do not honor directory hierarchy and iterate through paths that are 783 * possibly in an ignored directory. 784 */ 785int is_path_excluded(struct path_exclude_check *check, 786 const char *name, int namelen, int *dtype) 787{ 788 struct exclude *exclude = 789 last_exclude_matching_path(check, name, namelen, dtype); 790 if (exclude) 791 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 792 return 0; 793} 794 795static struct dir_entry *dir_entry_new(const char *pathname, int len) 796{ 797 struct dir_entry *ent; 798 799 ent = xmalloc(sizeof(*ent) + len + 1); 800 ent->len = len; 801 memcpy(ent->name, pathname, len); 802 ent->name[len] = 0; 803 return ent; 804} 805 806static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) 807{ 808 if (cache_name_exists(pathname, len, ignore_case)) 809 return NULL; 810 811 ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); 812 return dir->entries[dir->nr++] = dir_entry_new(pathname, len); 813} 814 815struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len) 816{ 817 if (!cache_name_is_other(pathname, len)) 818 return NULL; 819 820 ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc); 821 return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len); 822} 823 824enum exist_status { 825 index_nonexistent = 0, 826 index_directory, 827 index_gitdir 828}; 829 830/* 831 * Do not use the alphabetically stored index to look up 832 * the directory name; instead, use the case insensitive 833 * name hash. 834 */ 835static enum exist_status directory_exists_in_index_icase(const char *dirname, int len) 836{ 837 struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case); 838 unsigned char endchar; 839 840 if (!ce) 841 return index_nonexistent; 842 endchar = ce->name[len]; 843 844 /* 845 * The cache_entry structure returned will contain this dirname 846 * and possibly additional path components. 847 */ 848 if (endchar == '/') 849 return index_directory; 850 851 /* 852 * If there are no additional path components, then this cache_entry 853 * represents a submodule. Submodules, despite being directories, 854 * are stored in the cache without a closing slash. 855 */ 856 if (!endchar && S_ISGITLINK(ce->ce_mode)) 857 return index_gitdir; 858 859 /* This should never be hit, but it exists just in case. */ 860 return index_nonexistent; 861} 862 863/* 864 * The index sorts alphabetically by entry name, which 865 * means that a gitlink sorts as '\0' at the end, while 866 * a directory (which is defined not as an entry, but as 867 * the files it contains) will sort with the '/' at the 868 * end. 869 */ 870static enum exist_status directory_exists_in_index(const char *dirname, int len) 871{ 872 int pos; 873 874 if (ignore_case) 875 return directory_exists_in_index_icase(dirname, len); 876 877 pos = cache_name_pos(dirname, len); 878 if (pos < 0) 879 pos = -pos-1; 880 while (pos < active_nr) { 881 struct cache_entry *ce = active_cache[pos++]; 882 unsigned char endchar; 883 884 if (strncmp(ce->name, dirname, len)) 885 break; 886 endchar = ce->name[len]; 887 if (endchar > '/') 888 break; 889 if (endchar == '/') 890 return index_directory; 891 if (!endchar && S_ISGITLINK(ce->ce_mode)) 892 return index_gitdir; 893 } 894 return index_nonexistent; 895} 896 897/* 898 * When we find a directory when traversing the filesystem, we 899 * have three distinct cases: 900 * 901 * - ignore it 902 * - see it as a directory 903 * - recurse into it 904 * 905 * and which one we choose depends on a combination of existing 906 * git index contents and the flags passed into the directory 907 * traversal routine. 908 * 909 * Case 1: If we *already* have entries in the index under that 910 * directory name, we always recurse into the directory to see 911 * all the files. 912 * 913 * Case 2: If we *already* have that directory name as a gitlink, 914 * we always continue to see it as a gitlink, regardless of whether 915 * there is an actual git directory there or not (it might not 916 * be checked out as a subproject!) 917 * 918 * Case 3: if we didn't have it in the index previously, we 919 * have a few sub-cases: 920 * 921 * (a) if "show_other_directories" is true, we show it as 922 * just a directory, unless "hide_empty_directories" is 923 * also true and the directory is empty, in which case 924 * we just ignore it entirely. 925 * (b) if it looks like a git directory, and we don't have 926 * 'no_gitlinks' set we treat it as a gitlink, and show it 927 * as a directory. 928 * (c) otherwise, we recurse into it. 929 */ 930enum directory_treatment { 931 show_directory, 932 ignore_directory, 933 recurse_into_directory 934}; 935 936static enum directory_treatment treat_directory(struct dir_struct *dir, 937 const char *dirname, int len, 938 const struct path_simplify *simplify) 939{ 940 /* The "len-1" is to strip the final '/' */ 941 switch (directory_exists_in_index(dirname, len-1)) { 942 case index_directory: 943 return recurse_into_directory; 944 945 case index_gitdir: 946 if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) 947 return ignore_directory; 948 return show_directory; 949 950 case index_nonexistent: 951 if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) 952 break; 953 if (!(dir->flags & DIR_NO_GITLINKS)) { 954 unsigned char sha1[20]; 955 if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0) 956 return show_directory; 957 } 958 return recurse_into_directory; 959 } 960 961 /* This is the "show_other_directories" case */ 962 if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) 963 return show_directory; 964 if (!read_directory_recursive(dir, dirname, len, 1, simplify)) 965 return ignore_directory; 966 return show_directory; 967} 968 969/* 970 * This is an inexact early pruning of any recursive directory 971 * reading - if the path cannot possibly be in the pathspec, 972 * return true, and we'll skip it early. 973 */ 974static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify) 975{ 976 if (simplify) { 977 for (;;) { 978 const char *match = simplify->path; 979 int len = simplify->len; 980 981 if (!match) 982 break; 983 if (len > pathlen) 984 len = pathlen; 985 if (!memcmp(path, match, len)) 986 return 0; 987 simplify++; 988 } 989 return 1; 990 } 991 return 0; 992} 993 994/* 995 * This function tells us whether an excluded path matches a 996 * list of "interesting" pathspecs. That is, whether a path matched 997 * by any of the pathspecs could possibly be ignored by excluding 998 * the specified path. This can happen if: 999 *1000 * 1. the path is mentioned explicitly in the pathspec1001 *1002 * 2. the path is a directory prefix of some element in the1003 * pathspec1004 */1005static int exclude_matches_pathspec(const char *path, int len,1006 const struct path_simplify *simplify)1007{1008 if (simplify) {1009 for (; simplify->path; simplify++) {1010 if (len == simplify->len1011 && !memcmp(path, simplify->path, len))1012 return 1;1013 if (len < simplify->len1014 && simplify->path[len] == '/'1015 && !memcmp(path, simplify->path, len))1016 return 1;1017 }1018 }1019 return 0;1020}10211022static int get_index_dtype(const char *path, int len)1023{1024 int pos;1025 struct cache_entry *ce;10261027 ce = cache_name_exists(path, len, 0);1028 if (ce) {1029 if (!ce_uptodate(ce))1030 return DT_UNKNOWN;1031 if (S_ISGITLINK(ce->ce_mode))1032 return DT_DIR;1033 /*1034 * Nobody actually cares about the1035 * difference between DT_LNK and DT_REG1036 */1037 return DT_REG;1038 }10391040 /* Try to look it up as a directory */1041 pos = cache_name_pos(path, len);1042 if (pos >= 0)1043 return DT_UNKNOWN;1044 pos = -pos-1;1045 while (pos < active_nr) {1046 ce = active_cache[pos++];1047 if (strncmp(ce->name, path, len))1048 break;1049 if (ce->name[len] > '/')1050 break;1051 if (ce->name[len] < '/')1052 continue;1053 if (!ce_uptodate(ce))1054 break; /* continue? */1055 return DT_DIR;1056 }1057 return DT_UNKNOWN;1058}10591060static int get_dtype(struct dirent *de, const char *path, int len)1061{1062 int dtype = de ? DTYPE(de) : DT_UNKNOWN;1063 struct stat st;10641065 if (dtype != DT_UNKNOWN)1066 return dtype;1067 dtype = get_index_dtype(path, len);1068 if (dtype != DT_UNKNOWN)1069 return dtype;1070 if (lstat(path, &st))1071 return dtype;1072 if (S_ISREG(st.st_mode))1073 return DT_REG;1074 if (S_ISDIR(st.st_mode))1075 return DT_DIR;1076 if (S_ISLNK(st.st_mode))1077 return DT_LNK;1078 return dtype;1079}10801081enum path_treatment {1082 path_ignored,1083 path_handled,1084 path_recurse1085};10861087static enum path_treatment treat_one_path(struct dir_struct *dir,1088 struct strbuf *path,1089 const struct path_simplify *simplify,1090 int dtype, struct dirent *de)1091{1092 int exclude = is_excluded(dir, path->buf, &dtype);1093 if (exclude && (dir->flags & DIR_COLLECT_IGNORED)1094 && exclude_matches_pathspec(path->buf, path->len, simplify))1095 dir_add_ignored(dir, path->buf, path->len);10961097 /*1098 * Excluded? If we don't explicitly want to show1099 * ignored files, ignore it1100 */1101 if (exclude && !(dir->flags & DIR_SHOW_IGNORED))1102 return path_ignored;11031104 if (dtype == DT_UNKNOWN)1105 dtype = get_dtype(de, path->buf, path->len);11061107 /*1108 * Do we want to see just the ignored files?1109 * We still need to recurse into directories,1110 * even if we don't ignore them, since the1111 * directory may contain files that we do..1112 */1113 if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {1114 if (dtype != DT_DIR)1115 return path_ignored;1116 }11171118 switch (dtype) {1119 default:1120 return path_ignored;1121 case DT_DIR:1122 strbuf_addch(path, '/');1123 switch (treat_directory(dir, path->buf, path->len, simplify)) {1124 case show_directory:1125 if (exclude != !!(dir->flags1126 & DIR_SHOW_IGNORED))1127 return path_ignored;1128 break;1129 case recurse_into_directory:1130 return path_recurse;1131 case ignore_directory:1132 return path_ignored;1133 }1134 break;1135 case DT_REG:1136 case DT_LNK:1137 break;1138 }1139 return path_handled;1140}11411142static enum path_treatment treat_path(struct dir_struct *dir,1143 struct dirent *de,1144 struct strbuf *path,1145 int baselen,1146 const struct path_simplify *simplify)1147{1148 int dtype;11491150 if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))1151 return path_ignored;1152 strbuf_setlen(path, baselen);1153 strbuf_addstr(path, de->d_name);1154 if (simplify_away(path->buf, path->len, simplify))1155 return path_ignored;11561157 dtype = DTYPE(de);1158 return treat_one_path(dir, path, simplify, dtype, de);1159}11601161/*1162 * Read a directory tree. We currently ignore anything but1163 * directories, regular files and symlinks. That's because git1164 * doesn't handle them at all yet. Maybe that will change some1165 * day.1166 *1167 * Also, we ignore the name ".git" (even if it is not a directory).1168 * That likely will not change.1169 */1170static int read_directory_recursive(struct dir_struct *dir,1171 const char *base, int baselen,1172 int check_only,1173 const struct path_simplify *simplify)1174{1175 DIR *fdir;1176 int contents = 0;1177 struct dirent *de;1178 struct strbuf path = STRBUF_INIT;11791180 strbuf_add(&path, base, baselen);11811182 fdir = opendir(path.len ? path.buf : ".");1183 if (!fdir)1184 goto out;11851186 while ((de = readdir(fdir)) != NULL) {1187 switch (treat_path(dir, de, &path, baselen, simplify)) {1188 case path_recurse:1189 contents += read_directory_recursive(dir, path.buf,1190 path.len, 0,1191 simplify);1192 continue;1193 case path_ignored:1194 continue;1195 case path_handled:1196 break;1197 }1198 contents++;1199 if (check_only)1200 break;1201 dir_add_name(dir, path.buf, path.len);1202 }1203 closedir(fdir);1204 out:1205 strbuf_release(&path);12061207 return contents;1208}12091210static int cmp_name(const void *p1, const void *p2)1211{1212 const struct dir_entry *e1 = *(const struct dir_entry **)p1;1213 const struct dir_entry *e2 = *(const struct dir_entry **)p2;12141215 return cache_name_compare(e1->name, e1->len,1216 e2->name, e2->len);1217}12181219static struct path_simplify *create_simplify(const char **pathspec)1220{1221 int nr, alloc = 0;1222 struct path_simplify *simplify = NULL;12231224 if (!pathspec)1225 return NULL;12261227 for (nr = 0 ; ; nr++) {1228 const char *match;1229 if (nr >= alloc) {1230 alloc = alloc_nr(alloc);1231 simplify = xrealloc(simplify, alloc * sizeof(*simplify));1232 }1233 match = *pathspec++;1234 if (!match)1235 break;1236 simplify[nr].path = match;1237 simplify[nr].len = simple_length(match);1238 }1239 simplify[nr].path = NULL;1240 simplify[nr].len = 0;1241 return simplify;1242}12431244static void free_simplify(struct path_simplify *simplify)1245{1246 free(simplify);1247}12481249static int treat_leading_path(struct dir_struct *dir,1250 const char *path, int len,1251 const struct path_simplify *simplify)1252{1253 struct strbuf sb = STRBUF_INIT;1254 int baselen, rc = 0;1255 const char *cp;12561257 while (len && path[len - 1] == '/')1258 len--;1259 if (!len)1260 return 1;1261 baselen = 0;1262 while (1) {1263 cp = path + baselen + !!baselen;1264 cp = memchr(cp, '/', path + len - cp);1265 if (!cp)1266 baselen = len;1267 else1268 baselen = cp - path;1269 strbuf_setlen(&sb, 0);1270 strbuf_add(&sb, path, baselen);1271 if (!is_directory(sb.buf))1272 break;1273 if (simplify_away(sb.buf, sb.len, simplify))1274 break;1275 if (treat_one_path(dir, &sb, simplify,1276 DT_DIR, NULL) == path_ignored)1277 break; /* do not recurse into it */1278 if (len <= baselen) {1279 rc = 1;1280 break; /* finished checking */1281 }1282 }1283 strbuf_release(&sb);1284 return rc;1285}12861287int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)1288{1289 struct path_simplify *simplify;12901291 if (has_symlink_leading_path(path, len))1292 return dir->nr;12931294 simplify = create_simplify(pathspec);1295 if (!len || treat_leading_path(dir, path, len, simplify))1296 read_directory_recursive(dir, path, len, 0, simplify);1297 free_simplify(simplify);1298 qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);1299 qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);1300 return dir->nr;1301}13021303int file_exists(const char *f)1304{1305 struct stat sb;1306 return lstat(f, &sb) == 0;1307}13081309/*1310 * Given two normalized paths (a trailing slash is ok), if subdir is1311 * outside dir, return -1. Otherwise return the offset in subdir that1312 * can be used as relative path to dir.1313 */1314int dir_inside_of(const char *subdir, const char *dir)1315{1316 int offset = 0;13171318 assert(dir && subdir && *dir && *subdir);13191320 while (*dir && *subdir && *dir == *subdir) {1321 dir++;1322 subdir++;1323 offset++;1324 }13251326 /* hel[p]/me vs hel[l]/yeah */1327 if (*dir && *subdir)1328 return -1;13291330 if (!*subdir)1331 return !*dir ? offset : -1; /* same dir */13321333 /* foo/[b]ar vs foo/[] */1334 if (is_dir_sep(dir[-1]))1335 return is_dir_sep(subdir[-1]) ? offset : -1;13361337 /* foo[/]bar vs foo[] */1338 return is_dir_sep(*subdir) ? offset + 1 : -1;1339}13401341int is_inside_dir(const char *dir)1342{1343 char cwd[PATH_MAX];1344 if (!dir)1345 return 0;1346 if (!getcwd(cwd, sizeof(cwd)))1347 die_errno("can't find the current directory");1348 return dir_inside_of(cwd, dir) >= 0;1349}13501351int is_empty_dir(const char *path)1352{1353 DIR *dir = opendir(path);1354 struct dirent *e;1355 int ret = 1;13561357 if (!dir)1358 return 0;13591360 while ((e = readdir(dir)) != NULL)1361 if (!is_dot_or_dotdot(e->d_name)) {1362 ret = 0;1363 break;1364 }13651366 closedir(dir);1367 return ret;1368}13691370static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)1371{1372 DIR *dir;1373 struct dirent *e;1374 int ret = 0, original_len = path->len, len, kept_down = 0;1375 int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);1376 int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL);1377 unsigned char submodule_head[20];13781379 if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&1380 !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {1381 /* Do not descend and nuke a nested git work tree. */1382 if (kept_up)1383 *kept_up = 1;1384 return 0;1385 }13861387 flag &= ~REMOVE_DIR_KEEP_TOPLEVEL;1388 dir = opendir(path->buf);1389 if (!dir) {1390 /* an empty dir could be removed even if it is unreadble */1391 if (!keep_toplevel)1392 return rmdir(path->buf);1393 else1394 return -1;1395 }1396 if (path->buf[original_len - 1] != '/')1397 strbuf_addch(path, '/');13981399 len = path->len;1400 while ((e = readdir(dir)) != NULL) {1401 struct stat st;1402 if (is_dot_or_dotdot(e->d_name))1403 continue;14041405 strbuf_setlen(path, len);1406 strbuf_addstr(path, e->d_name);1407 if (lstat(path->buf, &st))1408 ; /* fall thru */1409 else if (S_ISDIR(st.st_mode)) {1410 if (!remove_dir_recurse(path, flag, &kept_down))1411 continue; /* happy */1412 } else if (!only_empty && !unlink(path->buf))1413 continue; /* happy, too */14141415 /* path too long, stat fails, or non-directory still exists */1416 ret = -1;1417 break;1418 }1419 closedir(dir);14201421 strbuf_setlen(path, original_len);1422 if (!ret && !keep_toplevel && !kept_down)1423 ret = rmdir(path->buf);1424 else if (kept_up)1425 /*1426 * report the uplevel that it is not an error that we1427 * did not rmdir() our directory.1428 */1429 *kept_up = !ret;1430 return ret;1431}14321433int remove_dir_recursively(struct strbuf *path, int flag)1434{1435 return remove_dir_recurse(path, flag, NULL);1436}14371438void setup_standard_excludes(struct dir_struct *dir)1439{1440 const char *path;14411442 dir->exclude_per_dir = ".gitignore";1443 path = git_path("info/exclude");1444 if (!access(path, R_OK))1445 add_excludes_from_file(dir, path);1446 if (excludes_file && !access(excludes_file, R_OK))1447 add_excludes_from_file(dir, excludes_file);1448}14491450int remove_path(const char *name)1451{1452 char *slash;14531454 if (unlink(name) && errno != ENOENT)1455 return -1;14561457 slash = strrchr(name, '/');1458 if (slash) {1459 char *dirs = xstrdup(name);1460 slash = dirs + (slash - name);1461 do {1462 *slash = '\0';1463 } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));1464 free(dirs);1465 }1466 return 0;1467}14681469static int pathspec_item_cmp(const void *a_, const void *b_)1470{1471 struct pathspec_item *a, *b;14721473 a = (struct pathspec_item *)a_;1474 b = (struct pathspec_item *)b_;1475 return strcmp(a->match, b->match);1476}14771478int init_pathspec(struct pathspec *pathspec, const char **paths)1479{1480 const char **p = paths;1481 int i;14821483 memset(pathspec, 0, sizeof(*pathspec));1484 if (!p)1485 return 0;1486 while (*p)1487 p++;1488 pathspec->raw = paths;1489 pathspec->nr = p - paths;1490 if (!pathspec->nr)1491 return 0;14921493 pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);1494 for (i = 0; i < pathspec->nr; i++) {1495 struct pathspec_item *item = pathspec->items+i;1496 const char *path = paths[i];14971498 item->match = path;1499 item->len = strlen(path);1500 item->use_wildcard = !no_wildcard(path);1501 if (item->use_wildcard)1502 pathspec->has_wildcard = 1;1503 }15041505 qsort(pathspec->items, pathspec->nr,1506 sizeof(struct pathspec_item), pathspec_item_cmp);15071508 return 0;1509}15101511void free_pathspec(struct pathspec *pathspec)1512{1513 free(pathspec->items);1514 pathspec->items = NULL;1515}