1/* 2 * This handles recursive filename detection with exclude 3 * files, index knowledge etc.. 4 * 5 * See Documentation/technical/api-directory-listing.txt 6 * 7 * Copyright (C) Linus Torvalds, 2005-2006 8 * Junio Hamano, 2005-2006 9 */ 10#include "cache.h" 11#include "dir.h" 12#include "refs.h" 13 14struct path_simplify { 15 int len; 16 const char *path; 17}; 18 19static int read_directory_recursive(struct dir_struct *dir, const char *path, int len, 20 int check_only, const struct path_simplify *simplify); 21static int get_dtype(struct dirent *de, const char *path, int len); 22 23/* helper string functions with support for the ignore_case flag */ 24int strcmp_icase(const char *a, const char *b) 25{ 26 return ignore_case ? strcasecmp(a, b) : strcmp(a, b); 27} 28 29int strncmp_icase(const char *a, const char *b, size_t count) 30{ 31 return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); 32} 33 34int fnmatch_icase(const char *pattern, const char *string, int flags) 35{ 36 return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0)); 37} 38 39static size_t common_prefix_len(const char **pathspec) 40{ 41 const char *n, *first; 42 size_t max = 0; 43 44 if (!pathspec) 45 return max; 46 47 first = *pathspec; 48 while ((n = *pathspec++)) { 49 size_t i, len = 0; 50 for (i = 0; first == n || i < max; i++) { 51 char c = n[i]; 52 if (!c || c != first[i] || is_glob_special(c)) 53 break; 54 if (c == '/') 55 len = i + 1; 56 } 57 if (first == n || len < max) { 58 max = len; 59 if (!max) 60 break; 61 } 62 } 63 return max; 64} 65 66/* 67 * Returns a copy of the longest leading path common among all 68 * pathspecs. 69 */ 70char *common_prefix(const char **pathspec) 71{ 72 unsigned long len = common_prefix_len(pathspec); 73 74 return len ? xmemdupz(*pathspec, len) : NULL; 75} 76 77int fill_directory(struct dir_struct *dir, const char **pathspec) 78{ 79 size_t len; 80 81 /* 82 * Calculate common prefix for the pathspec, and 83 * use that to optimize the directory walk 84 */ 85 len = common_prefix_len(pathspec); 86 87 /* Read the directory and prune it */ 88 read_directory(dir, pathspec ? *pathspec : "", len, pathspec); 89 return len; 90} 91 92int within_depth(const char *name, int namelen, 93 int depth, int max_depth) 94{ 95 const char *cp = name, *cpe = name + namelen; 96 97 while (cp < cpe) { 98 if (*cp++ != '/') 99 continue; 100 depth++; 101 if (depth > max_depth) 102 return 0; 103 } 104 return 1; 105} 106 107/* 108 * Does 'match' match the given name? 109 * A match is found if 110 * 111 * (1) the 'match' string is leading directory of 'name', or 112 * (2) the 'match' string is a wildcard and matches 'name', or 113 * (3) the 'match' string is exactly the same as 'name'. 114 * 115 * and the return value tells which case it was. 116 * 117 * It returns 0 when there is no match. 118 */ 119static int match_one(const char *match, const char *name, int namelen) 120{ 121 int matchlen; 122 123 /* If the match was just the prefix, we matched */ 124 if (!*match) 125 return MATCHED_RECURSIVELY; 126 127 if (ignore_case) { 128 for (;;) { 129 unsigned char c1 = tolower(*match); 130 unsigned char c2 = tolower(*name); 131 if (c1 == '\0' || is_glob_special(c1)) 132 break; 133 if (c1 != c2) 134 return 0; 135 match++; 136 name++; 137 namelen--; 138 } 139 } else { 140 for (;;) { 141 unsigned char c1 = *match; 142 unsigned char c2 = *name; 143 if (c1 == '\0' || is_glob_special(c1)) 144 break; 145 if (c1 != c2) 146 return 0; 147 match++; 148 name++; 149 namelen--; 150 } 151 } 152 153 154 /* 155 * If we don't match the matchstring exactly, 156 * we need to match by fnmatch 157 */ 158 matchlen = strlen(match); 159 if (strncmp_icase(match, name, matchlen)) 160 return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0; 161 162 if (namelen == matchlen) 163 return MATCHED_EXACTLY; 164 if (match[matchlen-1] == '/' || name[matchlen] == '/') 165 return MATCHED_RECURSIVELY; 166 return 0; 167} 168 169/* 170 * Given a name and a list of pathspecs, see if the name matches 171 * any of the pathspecs. The caller is also interested in seeing 172 * all pathspec matches some names it calls this function with 173 * (otherwise the user could have mistyped the unmatched pathspec), 174 * and a mark is left in seen[] array for pathspec element that 175 * actually matched anything. 176 */ 177int match_pathspec(const char **pathspec, const char *name, int namelen, 178 int prefix, char *seen) 179{ 180 int i, retval = 0; 181 182 if (!pathspec) 183 return 1; 184 185 name += prefix; 186 namelen -= prefix; 187 188 for (i = 0; pathspec[i] != NULL; i++) { 189 int how; 190 const char *match = pathspec[i] + prefix; 191 if (seen && seen[i] == MATCHED_EXACTLY) 192 continue; 193 how = match_one(match, name, namelen); 194 if (how) { 195 if (retval < how) 196 retval = how; 197 if (seen && seen[i] < how) 198 seen[i] = how; 199 } 200 } 201 return retval; 202} 203 204/* 205 * Does 'match' match the given name? 206 * A match is found if 207 * 208 * (1) the 'match' string is leading directory of 'name', or 209 * (2) the 'match' string is a wildcard and matches 'name', or 210 * (3) the 'match' string is exactly the same as 'name'. 211 * 212 * and the return value tells which case it was. 213 * 214 * It returns 0 when there is no match. 215 */ 216static int match_pathspec_item(const struct pathspec_item *item, int prefix, 217 const char *name, int namelen) 218{ 219 /* name/namelen has prefix cut off by caller */ 220 const char *match = item->match + prefix; 221 int matchlen = item->len - prefix; 222 223 /* If the match was just the prefix, we matched */ 224 if (!*match) 225 return MATCHED_RECURSIVELY; 226 227 if (matchlen <= namelen && !strncmp(match, name, matchlen)) { 228 if (matchlen == namelen) 229 return MATCHED_EXACTLY; 230 231 if (match[matchlen-1] == '/' || name[matchlen] == '/') 232 return MATCHED_RECURSIVELY; 233 } 234 235 if (item->use_wildcard && !fnmatch(match, name, 0)) 236 return MATCHED_FNMATCH; 237 238 return 0; 239} 240 241/* 242 * Given a name and a list of pathspecs, see if the name matches 243 * any of the pathspecs. The caller is also interested in seeing 244 * all pathspec matches some names it calls this function with 245 * (otherwise the user could have mistyped the unmatched pathspec), 246 * and a mark is left in seen[] array for pathspec element that 247 * actually matched anything. 248 */ 249int match_pathspec_depth(const struct pathspec *ps, 250 const char *name, int namelen, 251 int prefix, char *seen) 252{ 253 int i, retval = 0; 254 255 if (!ps->nr) { 256 if (!ps->recursive || ps->max_depth == -1) 257 return MATCHED_RECURSIVELY; 258 259 if (within_depth(name, namelen, 0, ps->max_depth)) 260 return MATCHED_EXACTLY; 261 else 262 return 0; 263 } 264 265 name += prefix; 266 namelen -= prefix; 267 268 for (i = ps->nr - 1; i >= 0; i--) { 269 int how; 270 if (seen && seen[i] == MATCHED_EXACTLY) 271 continue; 272 how = match_pathspec_item(ps->items+i, prefix, name, namelen); 273 if (ps->recursive && ps->max_depth != -1 && 274 how && how != MATCHED_FNMATCH) { 275 int len = ps->items[i].len; 276 if (name[len] == '/') 277 len++; 278 if (within_depth(name+len, namelen-len, 0, ps->max_depth)) 279 how = MATCHED_EXACTLY; 280 else 281 how = 0; 282 } 283 if (how) { 284 if (retval < how) 285 retval = how; 286 if (seen && seen[i] < how) 287 seen[i] = how; 288 } 289 } 290 return retval; 291} 292 293/* 294 * Return the length of the "simple" part of a path match limiter. 295 */ 296static int simple_length(const char *match) 297{ 298 int len = -1; 299 300 for (;;) { 301 unsigned char c = *match++; 302 len++; 303 if (c == '\0' || is_glob_special(c)) 304 return len; 305 } 306} 307 308static int no_wildcard(const char *string) 309{ 310 return string[simple_length(string)] == '\0'; 311} 312 313void parse_exclude_pattern(const char **pattern, 314 int *patternlen, 315 int *flags, 316 int *nowildcardlen) 317{ 318 const char *p = *pattern; 319 size_t i, len; 320 321 *flags = 0; 322 if (*p == '!') { 323 *flags |= EXC_FLAG_NEGATIVE; 324 p++; 325 } 326 len = strlen(p); 327 if (len && p[len - 1] == '/') { 328 len--; 329 *flags |= EXC_FLAG_MUSTBEDIR; 330 } 331 for (i = 0; i < len; i++) { 332 if (p[i] == '/') 333 break; 334 } 335 if (i == len) 336 *flags |= EXC_FLAG_NODIR; 337 *nowildcardlen = simple_length(p); 338 /* 339 * we should have excluded the trailing slash from 'p' too, 340 * but that's one more allocation. Instead just make sure 341 * nowildcardlen does not exceed real patternlen 342 */ 343 if (*nowildcardlen > len) 344 *nowildcardlen = len; 345 if (*p == '*' && no_wildcard(p + 1)) 346 *flags |= EXC_FLAG_ENDSWITH; 347 *pattern = p; 348 *patternlen = len; 349} 350 351void add_exclude(const char *string, const char *base, 352 int baselen, struct exclude_list *el) 353{ 354 struct exclude *x; 355 int patternlen; 356 int flags; 357 int nowildcardlen; 358 359 parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen); 360 if (flags & EXC_FLAG_MUSTBEDIR) { 361 char *s; 362 x = xmalloc(sizeof(*x) + patternlen + 1); 363 s = (char *)(x+1); 364 memcpy(s, string, patternlen); 365 s[patternlen] = '\0'; 366 x->pattern = s; 367 } else { 368 x = xmalloc(sizeof(*x)); 369 x->pattern = string; 370 } 371 x->patternlen = patternlen; 372 x->nowildcardlen = nowildcardlen; 373 x->base = base; 374 x->baselen = baselen; 375 x->flags = flags; 376 ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); 377 el->excludes[el->nr++] = x; 378} 379 380static void *read_skip_worktree_file_from_index(const char *path, size_t *size) 381{ 382 int pos, len; 383 unsigned long sz; 384 enum object_type type; 385 void *data; 386 struct index_state *istate = &the_index; 387 388 len = strlen(path); 389 pos = index_name_pos(istate, path, len); 390 if (pos < 0) 391 return NULL; 392 if (!ce_skip_worktree(istate->cache[pos])) 393 return NULL; 394 data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz); 395 if (!data || type != OBJ_BLOB) { 396 free(data); 397 return NULL; 398 } 399 *size = xsize_t(sz); 400 return data; 401} 402 403/* 404 * Frees memory within el which was allocated for exclude patterns and 405 * the file buffer. Does not free el itself. 406 */ 407void clear_exclude_list(struct exclude_list *el) 408{ 409 int i; 410 411 for (i = 0; i < el->nr; i++) 412 free(el->excludes[i]); 413 free(el->excludes); 414 free(el->filebuf); 415 416 el->nr = 0; 417 el->excludes = NULL; 418 el->filebuf = NULL; 419} 420 421int add_excludes_from_file_to_list(const char *fname, 422 const char *base, 423 int baselen, 424 struct exclude_list *el, 425 int check_index) 426{ 427 struct stat st; 428 int fd, i; 429 size_t size = 0; 430 char *buf, *entry; 431 432 fd = open(fname, O_RDONLY); 433 if (fd < 0 || fstat(fd, &st) < 0) { 434 if (0 <= fd) 435 close(fd); 436 if (!check_index || 437 (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL) 438 return -1; 439 if (size == 0) { 440 free(buf); 441 return 0; 442 } 443 if (buf[size-1] != '\n') { 444 buf = xrealloc(buf, size+1); 445 buf[size++] = '\n'; 446 } 447 } 448 else { 449 size = xsize_t(st.st_size); 450 if (size == 0) { 451 close(fd); 452 return 0; 453 } 454 buf = xmalloc(size+1); 455 if (read_in_full(fd, buf, size) != size) { 456 free(buf); 457 close(fd); 458 return -1; 459 } 460 buf[size++] = '\n'; 461 close(fd); 462 } 463 464 el->filebuf = buf; 465 entry = buf; 466 for (i = 0; i < size; i++) { 467 if (buf[i] == '\n') { 468 if (entry != buf + i && entry[0] != '#') { 469 buf[i - (i && buf[i-1] == '\r')] = 0; 470 add_exclude(entry, base, baselen, el); 471 } 472 entry = buf + i + 1; 473 } 474 } 475 return 0; 476} 477 478struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type) 479{ 480 struct exclude_list *el; 481 struct exclude_list_group *group; 482 483 group = &dir->exclude_list_group[group_type]; 484 ALLOC_GROW(group->el, group->nr + 1, group->alloc); 485 el = &group->el[group->nr++]; 486 memset(el, 0, sizeof(*el)); 487 return el; 488} 489 490/* 491 * Used to set up core.excludesfile and .git/info/exclude lists. 492 */ 493void add_excludes_from_file(struct dir_struct *dir, const char *fname) 494{ 495 struct exclude_list *el; 496 el = add_exclude_list(dir, EXC_FILE); 497 if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0) 498 die("cannot use %s as an exclude file", fname); 499} 500 501/* 502 * Loads the per-directory exclude list for the substring of base 503 * which has a char length of baselen. 504 */ 505static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) 506{ 507 struct exclude_list_group *group; 508 struct exclude_list *el; 509 struct exclude_stack *stk = NULL; 510 int current; 511 512 if ((!dir->exclude_per_dir) || 513 (baselen + strlen(dir->exclude_per_dir) >= PATH_MAX)) 514 return; /* too long a path -- ignore */ 515 516 group = &dir->exclude_list_group[EXC_DIRS]; 517 518 /* Pop the exclude lists from the EXCL_DIRS exclude_list_group 519 * which originate from directories not in the prefix of the 520 * path being checked. */ 521 while ((stk = dir->exclude_stack) != NULL) { 522 if (stk->baselen <= baselen && 523 !strncmp(dir->basebuf, base, stk->baselen)) 524 break; 525 el = &group->el[dir->exclude_stack->exclude_ix]; 526 dir->exclude_stack = stk->prev; 527 clear_exclude_list(el); 528 free(stk); 529 group->nr--; 530 } 531 532 /* Read from the parent directories and push them down. */ 533 current = stk ? stk->baselen : -1; 534 while (current < baselen) { 535 struct exclude_stack *stk = xcalloc(1, sizeof(*stk)); 536 const char *cp; 537 538 if (current < 0) { 539 cp = base; 540 current = 0; 541 } 542 else { 543 cp = strchr(base + current + 1, '/'); 544 if (!cp) 545 die("oops in prep_exclude"); 546 cp++; 547 } 548 stk->prev = dir->exclude_stack; 549 stk->baselen = cp - base; 550 memcpy(dir->basebuf + current, base + current, 551 stk->baselen - current); 552 strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); 553 el = add_exclude_list(dir, EXC_DIRS); 554 stk->exclude_ix = group->nr - 1; 555 add_excludes_from_file_to_list(dir->basebuf, 556 dir->basebuf, stk->baselen, 557 el, 1); 558 dir->exclude_stack = stk; 559 current = stk->baselen; 560 } 561 dir->basebuf[baselen] = '\0'; 562} 563 564int match_basename(const char *basename, int basenamelen, 565 const char *pattern, int prefix, int patternlen, 566 int flags) 567{ 568 if (prefix == patternlen) { 569 if (!strcmp_icase(pattern, basename)) 570 return 1; 571 } else if (flags & EXC_FLAG_ENDSWITH) { 572 if (patternlen - 1 <= basenamelen && 573 !strcmp_icase(pattern + 1, 574 basename + basenamelen - patternlen + 1)) 575 return 1; 576 } else { 577 if (fnmatch_icase(pattern, basename, 0) == 0) 578 return 1; 579 } 580 return 0; 581} 582 583int match_pathname(const char *pathname, int pathlen, 584 const char *base, int baselen, 585 const char *pattern, int prefix, int patternlen, 586 int flags) 587{ 588 const char *name; 589 int namelen; 590 591 /* 592 * match with FNM_PATHNAME; the pattern has base implicitly 593 * in front of it. 594 */ 595 if (*pattern == '/') { 596 pattern++; 597 prefix--; 598 } 599 600 /* 601 * baselen does not count the trailing slash. base[] may or 602 * may not end with a trailing slash though. 603 */ 604 if (pathlen < baselen + 1 || 605 (baselen && pathname[baselen] != '/') || 606 strncmp_icase(pathname, base, baselen)) 607 return 0; 608 609 namelen = baselen ? pathlen - baselen - 1 : pathlen; 610 name = pathname + pathlen - namelen; 611 612 if (prefix) { 613 /* 614 * if the non-wildcard part is longer than the 615 * remaining pathname, surely it cannot match. 616 */ 617 if (prefix > namelen) 618 return 0; 619 620 if (strncmp_icase(pattern, name, prefix)) 621 return 0; 622 pattern += prefix; 623 name += prefix; 624 namelen -= prefix; 625 } 626 627 return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; 628} 629 630/* 631 * Scan the given exclude list in reverse to see whether pathname 632 * should be ignored. The first match (i.e. the last on the list), if 633 * any, determines the fate. Returns the exclude_list element which 634 * matched, or NULL for undecided. 635 */ 636static struct exclude *last_exclude_matching_from_list(const char *pathname, 637 int pathlen, 638 const char *basename, 639 int *dtype, 640 struct exclude_list *el) 641{ 642 int i; 643 644 if (!el->nr) 645 return NULL; /* undefined */ 646 647 for (i = el->nr - 1; 0 <= i; i--) { 648 struct exclude *x = el->excludes[i]; 649 const char *exclude = x->pattern; 650 int prefix = x->nowildcardlen; 651 652 if (x->flags & EXC_FLAG_MUSTBEDIR) { 653 if (*dtype == DT_UNKNOWN) 654 *dtype = get_dtype(NULL, pathname, pathlen); 655 if (*dtype != DT_DIR) 656 continue; 657 } 658 659 if (x->flags & EXC_FLAG_NODIR) { 660 if (match_basename(basename, 661 pathlen - (basename - pathname), 662 exclude, prefix, x->patternlen, 663 x->flags)) 664 return x; 665 continue; 666 } 667 668 assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); 669 if (match_pathname(pathname, pathlen, 670 x->base, x->baselen ? x->baselen - 1 : 0, 671 exclude, prefix, x->patternlen, x->flags)) 672 return x; 673 } 674 return NULL; /* undecided */ 675} 676 677/* 678 * Scan the list and let the last match determine the fate. 679 * Return 1 for exclude, 0 for include and -1 for undecided. 680 */ 681int is_excluded_from_list(const char *pathname, 682 int pathlen, const char *basename, int *dtype, 683 struct exclude_list *el) 684{ 685 struct exclude *exclude; 686 exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el); 687 if (exclude) 688 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 689 return -1; /* undecided */ 690} 691 692/* 693 * Loads the exclude lists for the directory containing pathname, then 694 * scans all exclude lists to determine whether pathname is excluded. 695 * Returns the exclude_list element which matched, or NULL for 696 * undecided. 697 */ 698static struct exclude *last_exclude_matching(struct dir_struct *dir, 699 const char *pathname, 700 int *dtype_p) 701{ 702 int pathlen = strlen(pathname); 703 int i, j; 704 struct exclude_list_group *group; 705 struct exclude *exclude; 706 const char *basename = strrchr(pathname, '/'); 707 basename = (basename) ? basename+1 : pathname; 708 709 prep_exclude(dir, pathname, basename-pathname); 710 711 for (i = EXC_CMDL; i <= EXC_FILE; i++) { 712 group = &dir->exclude_list_group[i]; 713 for (j = group->nr - 1; j >= 0; j--) { 714 exclude = last_exclude_matching_from_list( 715 pathname, pathlen, basename, dtype_p, 716 &group->el[j]); 717 if (exclude) 718 return exclude; 719 } 720 } 721 return NULL; 722} 723 724/* 725 * Loads the exclude lists for the directory containing pathname, then 726 * scans all exclude lists to determine whether pathname is excluded. 727 * Returns 1 if true, otherwise 0. 728 */ 729static int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) 730{ 731 struct exclude *exclude = 732 last_exclude_matching(dir, pathname, dtype_p); 733 if (exclude) 734 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 735 return 0; 736} 737 738void path_exclude_check_init(struct path_exclude_check *check, 739 struct dir_struct *dir) 740{ 741 check->dir = dir; 742 check->exclude = NULL; 743 strbuf_init(&check->path, 256); 744} 745 746void path_exclude_check_clear(struct path_exclude_check *check) 747{ 748 strbuf_release(&check->path); 749} 750 751/* 752 * For each subdirectory in name, starting with the top-most, checks 753 * to see if that subdirectory is excluded, and if so, returns the 754 * corresponding exclude structure. Otherwise, checks whether name 755 * itself (which is presumably a file) is excluded. 756 * 757 * A path to a directory known to be excluded is left in check->path to 758 * optimize for repeated checks for files in the same excluded directory. 759 */ 760struct exclude *last_exclude_matching_path(struct path_exclude_check *check, 761 const char *name, int namelen, 762 int *dtype) 763{ 764 int i; 765 struct strbuf *path = &check->path; 766 struct exclude *exclude; 767 768 /* 769 * we allow the caller to pass namelen as an optimization; it 770 * must match the length of the name, as we eventually call 771 * is_excluded() on the whole name string. 772 */ 773 if (namelen < 0) 774 namelen = strlen(name); 775 776 /* 777 * If path is non-empty, and name is equal to path or a 778 * subdirectory of path, name should be excluded, because 779 * it's inside a directory which is already known to be 780 * excluded and was previously left in check->path. 781 */ 782 if (path->len && 783 path->len <= namelen && 784 !memcmp(name, path->buf, path->len) && 785 (!name[path->len] || name[path->len] == '/')) 786 return check->exclude; 787 788 strbuf_setlen(path, 0); 789 for (i = 0; name[i]; i++) { 790 int ch = name[i]; 791 792 if (ch == '/') { 793 int dt = DT_DIR; 794 exclude = last_exclude_matching(check->dir, 795 path->buf, &dt); 796 if (exclude) { 797 check->exclude = exclude; 798 return exclude; 799 } 800 } 801 strbuf_addch(path, ch); 802 } 803 804 /* An entry in the index; cannot be a directory with subentries */ 805 strbuf_setlen(path, 0); 806 807 return last_exclude_matching(check->dir, name, dtype); 808} 809 810/* 811 * Is this name excluded? This is for a caller like show_files() that 812 * do not honor directory hierarchy and iterate through paths that are 813 * possibly in an ignored directory. 814 */ 815int is_path_excluded(struct path_exclude_check *check, 816 const char *name, int namelen, int *dtype) 817{ 818 struct exclude *exclude = 819 last_exclude_matching_path(check, name, namelen, dtype); 820 if (exclude) 821 return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; 822 return 0; 823} 824 825static struct dir_entry *dir_entry_new(const char *pathname, int len) 826{ 827 struct dir_entry *ent; 828 829 ent = xmalloc(sizeof(*ent) + len + 1); 830 ent->len = len; 831 memcpy(ent->name, pathname, len); 832 ent->name[len] = 0; 833 return ent; 834} 835 836static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) 837{ 838 if (cache_name_exists(pathname, len, ignore_case)) 839 return NULL; 840 841 ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); 842 return dir->entries[dir->nr++] = dir_entry_new(pathname, len); 843} 844 845struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len) 846{ 847 if (!cache_name_is_other(pathname, len)) 848 return NULL; 849 850 ALLOC_GROW(dir->ignored, dir->ignored_nr+1, dir->ignored_alloc); 851 return dir->ignored[dir->ignored_nr++] = dir_entry_new(pathname, len); 852} 853 854enum exist_status { 855 index_nonexistent = 0, 856 index_directory, 857 index_gitdir 858}; 859 860/* 861 * Do not use the alphabetically stored index to look up 862 * the directory name; instead, use the case insensitive 863 * name hash. 864 */ 865static enum exist_status directory_exists_in_index_icase(const char *dirname, int len) 866{ 867 struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case); 868 unsigned char endchar; 869 870 if (!ce) 871 return index_nonexistent; 872 endchar = ce->name[len]; 873 874 /* 875 * The cache_entry structure returned will contain this dirname 876 * and possibly additional path components. 877 */ 878 if (endchar == '/') 879 return index_directory; 880 881 /* 882 * If there are no additional path components, then this cache_entry 883 * represents a submodule. Submodules, despite being directories, 884 * are stored in the cache without a closing slash. 885 */ 886 if (!endchar && S_ISGITLINK(ce->ce_mode)) 887 return index_gitdir; 888 889 /* This should never be hit, but it exists just in case. */ 890 return index_nonexistent; 891} 892 893/* 894 * The index sorts alphabetically by entry name, which 895 * means that a gitlink sorts as '\0' at the end, while 896 * a directory (which is defined not as an entry, but as 897 * the files it contains) will sort with the '/' at the 898 * end. 899 */ 900static enum exist_status directory_exists_in_index(const char *dirname, int len) 901{ 902 int pos; 903 904 if (ignore_case) 905 return directory_exists_in_index_icase(dirname, len); 906 907 pos = cache_name_pos(dirname, len); 908 if (pos < 0) 909 pos = -pos-1; 910 while (pos < active_nr) { 911 struct cache_entry *ce = active_cache[pos++]; 912 unsigned char endchar; 913 914 if (strncmp(ce->name, dirname, len)) 915 break; 916 endchar = ce->name[len]; 917 if (endchar > '/') 918 break; 919 if (endchar == '/') 920 return index_directory; 921 if (!endchar && S_ISGITLINK(ce->ce_mode)) 922 return index_gitdir; 923 } 924 return index_nonexistent; 925} 926 927/* 928 * When we find a directory when traversing the filesystem, we 929 * have three distinct cases: 930 * 931 * - ignore it 932 * - see it as a directory 933 * - recurse into it 934 * 935 * and which one we choose depends on a combination of existing 936 * git index contents and the flags passed into the directory 937 * traversal routine. 938 * 939 * Case 1: If we *already* have entries in the index under that 940 * directory name, we always recurse into the directory to see 941 * all the files. 942 * 943 * Case 2: If we *already* have that directory name as a gitlink, 944 * we always continue to see it as a gitlink, regardless of whether 945 * there is an actual git directory there or not (it might not 946 * be checked out as a subproject!) 947 * 948 * Case 3: if we didn't have it in the index previously, we 949 * have a few sub-cases: 950 * 951 * (a) if "show_other_directories" is true, we show it as 952 * just a directory, unless "hide_empty_directories" is 953 * also true and the directory is empty, in which case 954 * we just ignore it entirely. 955 * (b) if it looks like a git directory, and we don't have 956 * 'no_gitlinks' set we treat it as a gitlink, and show it 957 * as a directory. 958 * (c) otherwise, we recurse into it. 959 */ 960enum directory_treatment { 961 show_directory, 962 ignore_directory, 963 recurse_into_directory 964}; 965 966static enum directory_treatment treat_directory(struct dir_struct *dir, 967 const char *dirname, int len, 968 const struct path_simplify *simplify) 969{ 970 /* The "len-1" is to strip the final '/' */ 971 switch (directory_exists_in_index(dirname, len-1)) { 972 case index_directory: 973 return recurse_into_directory; 974 975 case index_gitdir: 976 if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) 977 return ignore_directory; 978 return show_directory; 979 980 case index_nonexistent: 981 if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) 982 break; 983 if (!(dir->flags & DIR_NO_GITLINKS)) { 984 unsigned char sha1[20]; 985 if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0) 986 return show_directory; 987 } 988 return recurse_into_directory; 989 } 990 991 /* This is the "show_other_directories" case */ 992 if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) 993 return show_directory; 994 if (!read_directory_recursive(dir, dirname, len, 1, simplify)) 995 return ignore_directory; 996 return show_directory; 997} 998 999/*1000 * This is an inexact early pruning of any recursive directory1001 * reading - if the path cannot possibly be in the pathspec,1002 * return true, and we'll skip it early.1003 */1004static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)1005{1006 if (simplify) {1007 for (;;) {1008 const char *match = simplify->path;1009 int len = simplify->len;10101011 if (!match)1012 break;1013 if (len > pathlen)1014 len = pathlen;1015 if (!memcmp(path, match, len))1016 return 0;1017 simplify++;1018 }1019 return 1;1020 }1021 return 0;1022}10231024/*1025 * This function tells us whether an excluded path matches a1026 * list of "interesting" pathspecs. That is, whether a path matched1027 * by any of the pathspecs could possibly be ignored by excluding1028 * the specified path. This can happen if:1029 *1030 * 1. the path is mentioned explicitly in the pathspec1031 *1032 * 2. the path is a directory prefix of some element in the1033 * pathspec1034 */1035static int exclude_matches_pathspec(const char *path, int len,1036 const struct path_simplify *simplify)1037{1038 if (simplify) {1039 for (; simplify->path; simplify++) {1040 if (len == simplify->len1041 && !memcmp(path, simplify->path, len))1042 return 1;1043 if (len < simplify->len1044 && simplify->path[len] == '/'1045 && !memcmp(path, simplify->path, len))1046 return 1;1047 }1048 }1049 return 0;1050}10511052static int get_index_dtype(const char *path, int len)1053{1054 int pos;1055 struct cache_entry *ce;10561057 ce = cache_name_exists(path, len, 0);1058 if (ce) {1059 if (!ce_uptodate(ce))1060 return DT_UNKNOWN;1061 if (S_ISGITLINK(ce->ce_mode))1062 return DT_DIR;1063 /*1064 * Nobody actually cares about the1065 * difference between DT_LNK and DT_REG1066 */1067 return DT_REG;1068 }10691070 /* Try to look it up as a directory */1071 pos = cache_name_pos(path, len);1072 if (pos >= 0)1073 return DT_UNKNOWN;1074 pos = -pos-1;1075 while (pos < active_nr) {1076 ce = active_cache[pos++];1077 if (strncmp(ce->name, path, len))1078 break;1079 if (ce->name[len] > '/')1080 break;1081 if (ce->name[len] < '/')1082 continue;1083 if (!ce_uptodate(ce))1084 break; /* continue? */1085 return DT_DIR;1086 }1087 return DT_UNKNOWN;1088}10891090static int get_dtype(struct dirent *de, const char *path, int len)1091{1092 int dtype = de ? DTYPE(de) : DT_UNKNOWN;1093 struct stat st;10941095 if (dtype != DT_UNKNOWN)1096 return dtype;1097 dtype = get_index_dtype(path, len);1098 if (dtype != DT_UNKNOWN)1099 return dtype;1100 if (lstat(path, &st))1101 return dtype;1102 if (S_ISREG(st.st_mode))1103 return DT_REG;1104 if (S_ISDIR(st.st_mode))1105 return DT_DIR;1106 if (S_ISLNK(st.st_mode))1107 return DT_LNK;1108 return dtype;1109}11101111enum path_treatment {1112 path_ignored,1113 path_handled,1114 path_recurse1115};11161117static enum path_treatment treat_one_path(struct dir_struct *dir,1118 struct strbuf *path,1119 const struct path_simplify *simplify,1120 int dtype, struct dirent *de)1121{1122 int exclude = is_excluded(dir, path->buf, &dtype);1123 if (exclude && (dir->flags & DIR_COLLECT_IGNORED)1124 && exclude_matches_pathspec(path->buf, path->len, simplify))1125 dir_add_ignored(dir, path->buf, path->len);11261127 /*1128 * Excluded? If we don't explicitly want to show1129 * ignored files, ignore it1130 */1131 if (exclude && !(dir->flags & DIR_SHOW_IGNORED))1132 return path_ignored;11331134 if (dtype == DT_UNKNOWN)1135 dtype = get_dtype(de, path->buf, path->len);11361137 /*1138 * Do we want to see just the ignored files?1139 * We still need to recurse into directories,1140 * even if we don't ignore them, since the1141 * directory may contain files that we do..1142 */1143 if (!exclude && (dir->flags & DIR_SHOW_IGNORED)) {1144 if (dtype != DT_DIR)1145 return path_ignored;1146 }11471148 switch (dtype) {1149 default:1150 return path_ignored;1151 case DT_DIR:1152 strbuf_addch(path, '/');1153 switch (treat_directory(dir, path->buf, path->len, simplify)) {1154 case show_directory:1155 if (exclude != !!(dir->flags1156 & DIR_SHOW_IGNORED))1157 return path_ignored;1158 break;1159 case recurse_into_directory:1160 return path_recurse;1161 case ignore_directory:1162 return path_ignored;1163 }1164 break;1165 case DT_REG:1166 case DT_LNK:1167 break;1168 }1169 return path_handled;1170}11711172static enum path_treatment treat_path(struct dir_struct *dir,1173 struct dirent *de,1174 struct strbuf *path,1175 int baselen,1176 const struct path_simplify *simplify)1177{1178 int dtype;11791180 if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))1181 return path_ignored;1182 strbuf_setlen(path, baselen);1183 strbuf_addstr(path, de->d_name);1184 if (simplify_away(path->buf, path->len, simplify))1185 return path_ignored;11861187 dtype = DTYPE(de);1188 return treat_one_path(dir, path, simplify, dtype, de);1189}11901191/*1192 * Read a directory tree. We currently ignore anything but1193 * directories, regular files and symlinks. That's because git1194 * doesn't handle them at all yet. Maybe that will change some1195 * day.1196 *1197 * Also, we ignore the name ".git" (even if it is not a directory).1198 * That likely will not change.1199 */1200static int read_directory_recursive(struct dir_struct *dir,1201 const char *base, int baselen,1202 int check_only,1203 const struct path_simplify *simplify)1204{1205 DIR *fdir;1206 int contents = 0;1207 struct dirent *de;1208 struct strbuf path = STRBUF_INIT;12091210 strbuf_add(&path, base, baselen);12111212 fdir = opendir(path.len ? path.buf : ".");1213 if (!fdir)1214 goto out;12151216 while ((de = readdir(fdir)) != NULL) {1217 switch (treat_path(dir, de, &path, baselen, simplify)) {1218 case path_recurse:1219 contents += read_directory_recursive(dir, path.buf,1220 path.len, 0,1221 simplify);1222 continue;1223 case path_ignored:1224 continue;1225 case path_handled:1226 break;1227 }1228 contents++;1229 if (check_only)1230 break;1231 dir_add_name(dir, path.buf, path.len);1232 }1233 closedir(fdir);1234 out:1235 strbuf_release(&path);12361237 return contents;1238}12391240static int cmp_name(const void *p1, const void *p2)1241{1242 const struct dir_entry *e1 = *(const struct dir_entry **)p1;1243 const struct dir_entry *e2 = *(const struct dir_entry **)p2;12441245 return cache_name_compare(e1->name, e1->len,1246 e2->name, e2->len);1247}12481249static struct path_simplify *create_simplify(const char **pathspec)1250{1251 int nr, alloc = 0;1252 struct path_simplify *simplify = NULL;12531254 if (!pathspec)1255 return NULL;12561257 for (nr = 0 ; ; nr++) {1258 const char *match;1259 if (nr >= alloc) {1260 alloc = alloc_nr(alloc);1261 simplify = xrealloc(simplify, alloc * sizeof(*simplify));1262 }1263 match = *pathspec++;1264 if (!match)1265 break;1266 simplify[nr].path = match;1267 simplify[nr].len = simple_length(match);1268 }1269 simplify[nr].path = NULL;1270 simplify[nr].len = 0;1271 return simplify;1272}12731274static void free_simplify(struct path_simplify *simplify)1275{1276 free(simplify);1277}12781279static int treat_leading_path(struct dir_struct *dir,1280 const char *path, int len,1281 const struct path_simplify *simplify)1282{1283 struct strbuf sb = STRBUF_INIT;1284 int baselen, rc = 0;1285 const char *cp;12861287 while (len && path[len - 1] == '/')1288 len--;1289 if (!len)1290 return 1;1291 baselen = 0;1292 while (1) {1293 cp = path + baselen + !!baselen;1294 cp = memchr(cp, '/', path + len - cp);1295 if (!cp)1296 baselen = len;1297 else1298 baselen = cp - path;1299 strbuf_setlen(&sb, 0);1300 strbuf_add(&sb, path, baselen);1301 if (!is_directory(sb.buf))1302 break;1303 if (simplify_away(sb.buf, sb.len, simplify))1304 break;1305 if (treat_one_path(dir, &sb, simplify,1306 DT_DIR, NULL) == path_ignored)1307 break; /* do not recurse into it */1308 if (len <= baselen) {1309 rc = 1;1310 break; /* finished checking */1311 }1312 }1313 strbuf_release(&sb);1314 return rc;1315}13161317int read_directory(struct dir_struct *dir, const char *path, int len, const char **pathspec)1318{1319 struct path_simplify *simplify;13201321 if (has_symlink_leading_path(path, len))1322 return dir->nr;13231324 simplify = create_simplify(pathspec);1325 if (!len || treat_leading_path(dir, path, len, simplify))1326 read_directory_recursive(dir, path, len, 0, simplify);1327 free_simplify(simplify);1328 qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);1329 qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);1330 return dir->nr;1331}13321333int file_exists(const char *f)1334{1335 struct stat sb;1336 return lstat(f, &sb) == 0;1337}13381339/*1340 * Given two normalized paths (a trailing slash is ok), if subdir is1341 * outside dir, return -1. Otherwise return the offset in subdir that1342 * can be used as relative path to dir.1343 */1344int dir_inside_of(const char *subdir, const char *dir)1345{1346 int offset = 0;13471348 assert(dir && subdir && *dir && *subdir);13491350 while (*dir && *subdir && *dir == *subdir) {1351 dir++;1352 subdir++;1353 offset++;1354 }13551356 /* hel[p]/me vs hel[l]/yeah */1357 if (*dir && *subdir)1358 return -1;13591360 if (!*subdir)1361 return !*dir ? offset : -1; /* same dir */13621363 /* foo/[b]ar vs foo/[] */1364 if (is_dir_sep(dir[-1]))1365 return is_dir_sep(subdir[-1]) ? offset : -1;13661367 /* foo[/]bar vs foo[] */1368 return is_dir_sep(*subdir) ? offset + 1 : -1;1369}13701371int is_inside_dir(const char *dir)1372{1373 char cwd[PATH_MAX];1374 if (!dir)1375 return 0;1376 if (!getcwd(cwd, sizeof(cwd)))1377 die_errno("can't find the current directory");1378 return dir_inside_of(cwd, dir) >= 0;1379}13801381int is_empty_dir(const char *path)1382{1383 DIR *dir = opendir(path);1384 struct dirent *e;1385 int ret = 1;13861387 if (!dir)1388 return 0;13891390 while ((e = readdir(dir)) != NULL)1391 if (!is_dot_or_dotdot(e->d_name)) {1392 ret = 0;1393 break;1394 }13951396 closedir(dir);1397 return ret;1398}13991400static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)1401{1402 DIR *dir;1403 struct dirent *e;1404 int ret = 0, original_len = path->len, len, kept_down = 0;1405 int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY);1406 int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL);1407 unsigned char submodule_head[20];14081409 if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) &&1410 !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) {1411 /* Do not descend and nuke a nested git work tree. */1412 if (kept_up)1413 *kept_up = 1;1414 return 0;1415 }14161417 flag &= ~REMOVE_DIR_KEEP_TOPLEVEL;1418 dir = opendir(path->buf);1419 if (!dir) {1420 /* an empty dir could be removed even if it is unreadble */1421 if (!keep_toplevel)1422 return rmdir(path->buf);1423 else1424 return -1;1425 }1426 if (path->buf[original_len - 1] != '/')1427 strbuf_addch(path, '/');14281429 len = path->len;1430 while ((e = readdir(dir)) != NULL) {1431 struct stat st;1432 if (is_dot_or_dotdot(e->d_name))1433 continue;14341435 strbuf_setlen(path, len);1436 strbuf_addstr(path, e->d_name);1437 if (lstat(path->buf, &st))1438 ; /* fall thru */1439 else if (S_ISDIR(st.st_mode)) {1440 if (!remove_dir_recurse(path, flag, &kept_down))1441 continue; /* happy */1442 } else if (!only_empty && !unlink(path->buf))1443 continue; /* happy, too */14441445 /* path too long, stat fails, or non-directory still exists */1446 ret = -1;1447 break;1448 }1449 closedir(dir);14501451 strbuf_setlen(path, original_len);1452 if (!ret && !keep_toplevel && !kept_down)1453 ret = rmdir(path->buf);1454 else if (kept_up)1455 /*1456 * report the uplevel that it is not an error that we1457 * did not rmdir() our directory.1458 */1459 *kept_up = !ret;1460 return ret;1461}14621463int remove_dir_recursively(struct strbuf *path, int flag)1464{1465 return remove_dir_recurse(path, flag, NULL);1466}14671468void setup_standard_excludes(struct dir_struct *dir)1469{1470 const char *path;14711472 dir->exclude_per_dir = ".gitignore";1473 path = git_path("info/exclude");1474 if (!access(path, R_OK))1475 add_excludes_from_file(dir, path);1476 if (excludes_file && !access(excludes_file, R_OK))1477 add_excludes_from_file(dir, excludes_file);1478}14791480int remove_path(const char *name)1481{1482 char *slash;14831484 if (unlink(name) && errno != ENOENT)1485 return -1;14861487 slash = strrchr(name, '/');1488 if (slash) {1489 char *dirs = xstrdup(name);1490 slash = dirs + (slash - name);1491 do {1492 *slash = '\0';1493 } while (rmdir(dirs) == 0 && (slash = strrchr(dirs, '/')));1494 free(dirs);1495 }1496 return 0;1497}14981499static int pathspec_item_cmp(const void *a_, const void *b_)1500{1501 struct pathspec_item *a, *b;15021503 a = (struct pathspec_item *)a_;1504 b = (struct pathspec_item *)b_;1505 return strcmp(a->match, b->match);1506}15071508int init_pathspec(struct pathspec *pathspec, const char **paths)1509{1510 const char **p = paths;1511 int i;15121513 memset(pathspec, 0, sizeof(*pathspec));1514 if (!p)1515 return 0;1516 while (*p)1517 p++;1518 pathspec->raw = paths;1519 pathspec->nr = p - paths;1520 if (!pathspec->nr)1521 return 0;15221523 pathspec->items = xmalloc(sizeof(struct pathspec_item)*pathspec->nr);1524 for (i = 0; i < pathspec->nr; i++) {1525 struct pathspec_item *item = pathspec->items+i;1526 const char *path = paths[i];15271528 item->match = path;1529 item->len = strlen(path);1530 item->use_wildcard = !no_wildcard(path);1531 if (item->use_wildcard)1532 pathspec->has_wildcard = 1;1533 }15341535 qsort(pathspec->items, pathspec->nr,1536 sizeof(struct pathspec_item), pathspec_item_cmp);15371538 return 0;1539}15401541void free_pathspec(struct pathspec *pathspec)1542{1543 free(pathspec->items);1544 pathspec->items = NULL;1545}