From: Junio C Hamano Date: Wed, 13 May 2015 21:05:51 +0000 (-0700) Subject: Merge branch 'cn/bom-in-gitignore' into maint X-Git-Tag: v2.4.1~6 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a?ds=inline;hp=-c Merge branch 'cn/bom-in-gitignore' into maint Teach the codepaths that read .gitignore and .gitattributes files that these files encoded in UTF-8 may have UTF-8 BOM marker at the beginning; this makes it in line with what we do for configuration files already. * cn/bom-in-gitignore: attr: skip UTF8 BOM at the beginning of the input file config: use utf8_bom[] from utf.[ch] in git_parse_source() utf8-bom: introduce skip_utf8_bom() helper add_excludes_from_file: clarify the bom skipping logic dir: allow a BOM at the beginning of exclude files --- 8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a diff --combined attr.c index 1f9eebd2dd,7c530f4d0c..7f445965c1 --- a/attr.c +++ b/attr.c @@@ -12,6 -12,7 +12,7 @@@ #include "exec_cmd.h" #include "attr.h" #include "dir.h" + #include "utf8.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@@ -32,12 -33,9 +33,12 @@@ struct git_attr struct git_attr *next; unsigned h; int attr_nr; + int maybe_macro; + int maybe_real; char name[FLEX_ARRAY]; }; static int attr_nr; +static int cannot_trust_maybe_real; static struct git_attr_check *check_all_attr; static struct git_attr *(git_attr_hash[HASHSIZE]); @@@ -98,8 -96,6 +99,8 @@@ static struct git_attr *git_attr_intern a->h = hval; a->next = git_attr_hash[pos]; a->attr_nr = attr_nr++; + a->maybe_macro = 0; + a->maybe_real = 0; git_attr_hash[pos] = a; REALLOC_ARRAY(check_all_attr, attr_nr); @@@ -249,10 -245,9 +250,10 @@@ static struct match_attr *parse_attr_li sizeof(*res) + sizeof(struct attr_state) * num_attr + (is_macro ? 0 : namelen + 1)); - if (is_macro) + if (is_macro) { res->u.attr = git_attr_internal(name, namelen); - else { + res->u.attr->maybe_macro = 1; + } else { char *p = (char *)&(res->state[num_attr]); memcpy(p, name, namelen); res->u.pat.pattern = p; @@@ -272,10 -267,6 +273,10 @@@ /* Second pass to fill the attr_states */ for (cp = states, i = 0; *cp; i++) { cp = parse_attr(src, lineno, cp, &(res->state[i])); + if (!is_macro) + res->state[i].attr->maybe_real = 1; + if (res->state[i].attr->maybe_macro) + cannot_trust_maybe_real = 1; } return res; @@@ -379,8 -370,12 +380,12 @@@ static struct attr_stack *read_attr_fro return NULL; } res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) - handle_attr_line(res, buf, path, ++lineno, macro_ok); + while (fgets(buf, sizeof(buf), fp)) { + char *bufp = buf; + if (!lineno) + skip_utf8_bom(&bufp, strlen(bufp)); + handle_attr_line(res, bufp, path, ++lineno, macro_ok); + } fclose(fp); return res; } @@@ -691,14 -686,13 +696,14 @@@ static int fill(const char *path, int p return rem; } -static int macroexpand_one(int attr_nr, int rem) +static int macroexpand_one(int nr, int rem) { struct attr_stack *stk; struct match_attr *a = NULL; int i; - if (check_all_attr[attr_nr].value != ATTR__TRUE) + if (check_all_attr[nr].value != ATTR__TRUE || + !check_all_attr[nr].attr->maybe_macro) return rem; for (stk = attr_stack; !a && stk; stk = stk->prev) @@@ -706,7 -700,7 +711,7 @@@ struct match_attr *ma = stk->attrs[i]; if (!ma->is_macro) continue; - if (ma->u.attr->attr_nr == attr_nr) + if (ma->u.attr->attr_nr == nr) a = ma; } @@@ -717,13 -711,10 +722,13 @@@ } /* - * Collect all attributes for path into the array pointed to by - * check_all_attr. + * Collect attributes for path into the array pointed to by + * check_all_attr. If num is non-zero, only attributes in check[] are + * collected. Otherwise all attributes are collected. */ -static void collect_all_attrs(const char *path) +static void collect_some_attrs(const char *path, int num, + struct git_attr_check *check) + { struct attr_stack *stk; int i, pathlen, rem, dirlen; @@@ -746,19 -737,6 +751,19 @@@ prepare_attr_stack(path, dirlen); for (i = 0; i < attr_nr; i++) check_all_attr[i].value = ATTR__UNKNOWN; + if (num && !cannot_trust_maybe_real) { + rem = 0; + for (i = 0; i < num; i++) { + if (!check[i].attr->maybe_real) { + struct git_attr_check *c; + c = check_all_attr + check[i].attr->attr_nr; + c->value = ATTR__UNSET; + rem++; + } + } + if (rem == num) + return; + } rem = attr_nr; for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) @@@ -769,7 -747,7 +774,7 @@@ int git_check_attr(const char *path, in { int i; - collect_all_attrs(path); + collect_some_attrs(path, num, check); for (i = 0; i < num; i++) { const char *value = check_all_attr[check[i].attr->attr_nr].value; @@@ -785,7 -763,7 +790,7 @@@ int git_all_attrs(const char *path, in { int i, count, j; - collect_all_attrs(path); + collect_some_attrs(path, 0, NULL); /* Count the number of attributes that are set. */ count = 0; diff --combined config.c index 66c0a51bce,9618aa443d..c4424c0138 --- a/config.c +++ b/config.c @@@ -12,6 -12,7 +12,7 @@@ #include "quote.h" #include "hashmap.h" #include "string-list.h" + #include "utf8.h" struct config_source { struct config_source *prev; @@@ -73,12 -74,8 +74,12 @@@ static int config_buf_fgetc(struct conf static int config_buf_ungetc(int c, struct config_source *conf) { - if (conf->u.buf.pos > 0) - return conf->u.buf.buf[--conf->u.buf.pos]; + if (conf->u.buf.pos > 0) { + conf->u.buf.pos--; + if (conf->u.buf.buf[conf->u.buf.pos] != c) + die("BUG: config_buf can only ungetc the same character"); + return c; + } return EOF; } @@@ -239,8 -236,7 +240,8 @@@ static int get_next_char(void /* DOS like systems */ c = cf->do_fgetc(cf); if (c != '\n') { - cf->do_ungetc(c, cf); + if (c != EOF) + cf->do_ungetc(c, cf); c = '\r'; } } @@@ -417,8 -413,7 +418,7 @@@ static int git_parse_source(config_fn_ struct strbuf *var = &cf->var; /* U+FEFF Byte Order Mark in UTF8 */ - static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; - const unsigned char *bomptr = utf8_bom; + const char *bomptr = utf8_bom; for (;;) { int c = get_next_char(); @@@ -426,7 -421,7 +426,7 @@@ /* We are at the file beginning; skip UTF8-encoded BOM * if present. Sane editors won't put this in on their * own, but e.g. Windows Notepad will do it happily. */ - if ((unsigned char) c == *bomptr) { + if (c == (*bomptr & 0377)) { bomptr++; continue; } else { @@@ -1345,7 -1340,7 +1345,7 @@@ static int configset_add_value(struct c string_list_init(&e->value_list, 1); hashmap_add(&cs->config_hash, e); } - si = string_list_append_nodup(&e->value_list, value ? xstrdup(value) : NULL); + si = string_list_append_nodup(&e->value_list, xstrdup_or_null(value)); ALLOC_GROW(cs->list.items, cs->list.nr + 1, cs->list.alloc); l_item = &cs->list.items[cs->list.nr++]; diff --combined dir.c index 0943a81964,4c4bf910fa..a3e7073400 --- a/dir.c +++ b/dir.c @@@ -12,6 -12,7 +12,7 @@@ #include "refs.h" #include "wildmatch.h" #include "pathspec.h" + #include "utf8.h" struct path_simplify { int len; @@@ -377,49 -378,6 +378,49 @@@ int match_pathspec(const struct pathspe return negative ? 0 : positive; } +int report_path_error(const char *ps_matched, + const struct pathspec *pathspec, + const char *prefix) +{ + /* + * Make sure all pathspec matched; otherwise it is an error. + */ + struct strbuf sb = STRBUF_INIT; + int num, errors = 0; + for (num = 0; num < pathspec->nr; num++) { + int other, found_dup; + + if (ps_matched[num]) + continue; + /* + * The caller might have fed identical pathspec + * twice. Do not barf on such a mistake. + * FIXME: parse_pathspec should have eliminated + * duplicate pathspec. + */ + for (found_dup = other = 0; + !found_dup && other < pathspec->nr; + other++) { + if (other == num || !ps_matched[other]) + continue; + if (!strcmp(pathspec->items[other].original, + pathspec->items[num].original)) + /* + * Ok, we have a match already. + */ + found_dup = 1; + } + if (found_dup) + continue; + + error("pathspec '%s' did not match any file(s) known to git.", + pathspec->items[num].original); + errors++; + } + strbuf_release(&sb); + return errors; +} + /* * Return the length of the "simple" part of a path match limiter. */ @@@ -617,7 -575,12 +618,12 @@@ int add_excludes_from_file_to_list(cons } el->filebuf = buf; + + if (skip_utf8_bom(&buf, size)) + size -= buf - el->filebuf; + entry = buf; + for (i = 0; i < size; i++) { if (buf[i] == '\n') { if (entry != buf + i && entry[0] != '#') {