From: Junio C Hamano <gitster@pobox.com>
Date: Wed, 13 May 2015 21:05:51 +0000 (-0700)
Subject: Merge branch 'cn/bom-in-gitignore' into maint
X-Git-Tag: v2.4.1~6
X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a?ds=inline;hp=-c

Merge branch 'cn/bom-in-gitignore' into maint

Teach the codepaths that read .gitignore and .gitattributes files
that these files encoded in UTF-8 may have UTF-8 BOM marker at the
beginning; this makes it in line with what we do for configuration
files already.

* cn/bom-in-gitignore:
attr: skip UTF8 BOM at the beginning of the input file
config: use utf8_bom[] from utf.[ch] in git_parse_source()
utf8-bom: introduce skip_utf8_bom() helper
add_excludes_from_file: clarify the bom skipping logic
dir: allow a BOM at the beginning of exclude files
---

8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a
diff --combined attr.c
index 1f9eebd2dd,7c530f4d0c..7f445965c1
--- a/attr.c
+++ b/attr.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "exec_cmd.h"
  #include "attr.h"
  #include "dir.h"
+ #include "utf8.h"
  
  const char git_attr__true[] = "(builtin)true";
  const char git_attr__false[] = "\0(builtin)false";
@@@ -32,12 -33,9 +33,12 @@@ struct git_attr 
  	struct git_attr *next;
  	unsigned h;
  	int attr_nr;
 +	int maybe_macro;
 +	int maybe_real;
  	char name[FLEX_ARRAY];
  };
  static int attr_nr;
 +static int cannot_trust_maybe_real;
  
  static struct git_attr_check *check_all_attr;
  static struct git_attr *(git_attr_hash[HASHSIZE]);
@@@ -98,8 -96,6 +99,8 @@@ static struct git_attr *git_attr_intern
  	a->h = hval;
  	a->next = git_attr_hash[pos];
  	a->attr_nr = attr_nr++;
 +	a->maybe_macro = 0;
 +	a->maybe_real = 0;
  	git_attr_hash[pos] = a;
  
  	REALLOC_ARRAY(check_all_attr, attr_nr);
@@@ -249,10 -245,9 +250,10 @@@ static struct match_attr *parse_attr_li
  		      sizeof(*res) +
  		      sizeof(struct attr_state) * num_attr +
  		      (is_macro ? 0 : namelen + 1));
 -	if (is_macro)
 +	if (is_macro) {
  		res->u.attr = git_attr_internal(name, namelen);
 -	else {
 +		res->u.attr->maybe_macro = 1;
 +	} else {
  		char *p = (char *)&(res->state[num_attr]);
  		memcpy(p, name, namelen);
  		res->u.pat.pattern = p;
@@@ -272,10 -267,6 +273,10 @@@
  	/* Second pass to fill the attr_states */
  	for (cp = states, i = 0; *cp; i++) {
  		cp = parse_attr(src, lineno, cp, &(res->state[i]));
 +		if (!is_macro)
 +			res->state[i].attr->maybe_real = 1;
 +		if (res->state[i].attr->maybe_macro)
 +			cannot_trust_maybe_real = 1;
  	}
  
  	return res;
@@@ -379,8 -370,12 +380,12 @@@ static struct attr_stack *read_attr_fro
  		return NULL;
  	}
  	res = xcalloc(1, sizeof(*res));
- 	while (fgets(buf, sizeof(buf), fp))
- 		handle_attr_line(res, buf, path, ++lineno, macro_ok);
+ 	while (fgets(buf, sizeof(buf), fp)) {
+ 		char *bufp = buf;
+ 		if (!lineno)
+ 			skip_utf8_bom(&bufp, strlen(bufp));
+ 		handle_attr_line(res, bufp, path, ++lineno, macro_ok);
+ 	}
  	fclose(fp);
  	return res;
  }
@@@ -691,14 -686,13 +696,14 @@@ static int fill(const char *path, int p
  	return rem;
  }
  
 -static int macroexpand_one(int attr_nr, int rem)
 +static int macroexpand_one(int nr, int rem)
  {
  	struct attr_stack *stk;
  	struct match_attr *a = NULL;
  	int i;
  
 -	if (check_all_attr[attr_nr].value != ATTR__TRUE)
 +	if (check_all_attr[nr].value != ATTR__TRUE ||
 +	    !check_all_attr[nr].attr->maybe_macro)
  		return rem;
  
  	for (stk = attr_stack; !a && stk; stk = stk->prev)
@@@ -706,7 -700,7 +711,7 @@@
  			struct match_attr *ma = stk->attrs[i];
  			if (!ma->is_macro)
  				continue;
 -			if (ma->u.attr->attr_nr == attr_nr)
 +			if (ma->u.attr->attr_nr == nr)
  				a = ma;
  		}
  
@@@ -717,13 -711,10 +722,13 @@@
  }
  
  /*
 - * Collect all attributes for path into the array pointed to by
 - * check_all_attr.
 + * Collect attributes for path into the array pointed to by
 + * check_all_attr. If num is non-zero, only attributes in check[] are
 + * collected. Otherwise all attributes are collected.
   */
 -static void collect_all_attrs(const char *path)
 +static void collect_some_attrs(const char *path, int num,
 +			       struct git_attr_check *check)
 +
  {
  	struct attr_stack *stk;
  	int i, pathlen, rem, dirlen;
@@@ -746,19 -737,6 +751,19 @@@
  	prepare_attr_stack(path, dirlen);
  	for (i = 0; i < attr_nr; i++)
  		check_all_attr[i].value = ATTR__UNKNOWN;
 +	if (num && !cannot_trust_maybe_real) {
 +		rem = 0;
 +		for (i = 0; i < num; i++) {
 +			if (!check[i].attr->maybe_real) {
 +				struct git_attr_check *c;
 +				c = check_all_attr + check[i].attr->attr_nr;
 +				c->value = ATTR__UNSET;
 +				rem++;
 +			}
 +		}
 +		if (rem == num)
 +			return;
 +	}
  
  	rem = attr_nr;
  	for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
@@@ -769,7 -747,7 +774,7 @@@ int git_check_attr(const char *path, in
  {
  	int i;
  
 -	collect_all_attrs(path);
 +	collect_some_attrs(path, num, check);
  
  	for (i = 0; i < num; i++) {
  		const char *value = check_all_attr[check[i].attr->attr_nr].value;
@@@ -785,7 -763,7 +790,7 @@@ int git_all_attrs(const char *path, in
  {
  	int i, count, j;
  
 -	collect_all_attrs(path);
 +	collect_some_attrs(path, 0, NULL);
  
  	/* Count the number of attributes that are set. */
  	count = 0;
diff --combined config.c
index 66c0a51bce,9618aa443d..c4424c0138
--- a/config.c
+++ b/config.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "quote.h"
  #include "hashmap.h"
  #include "string-list.h"
+ #include "utf8.h"
  
  struct config_source {
  	struct config_source *prev;
@@@ -73,12 -74,8 +74,12 @@@ static int config_buf_fgetc(struct conf
  
  static int config_buf_ungetc(int c, struct config_source *conf)
  {
 -	if (conf->u.buf.pos > 0)
 -		return conf->u.buf.buf[--conf->u.buf.pos];
 +	if (conf->u.buf.pos > 0) {
 +		conf->u.buf.pos--;
 +		if (conf->u.buf.buf[conf->u.buf.pos] != c)
 +			die("BUG: config_buf can only ungetc the same character");
 +		return c;
 +	}
  
  	return EOF;
  }
@@@ -239,8 -236,7 +240,8 @@@ static int get_next_char(void
  		/* DOS like systems */
  		c = cf->do_fgetc(cf);
  		if (c != '\n') {
 -			cf->do_ungetc(c, cf);
 +			if (c != EOF)
 +				cf->do_ungetc(c, cf);
  			c = '\r';
  		}
  	}
@@@ -417,8 -413,7 +418,7 @@@ static int git_parse_source(config_fn_
  	struct strbuf *var = &cf->var;
  
  	/* U+FEFF Byte Order Mark in UTF8 */
- 	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
- 	const unsigned char *bomptr = utf8_bom;
+ 	const char *bomptr = utf8_bom;
  
  	for (;;) {
  		int c = get_next_char();
@@@ -426,7 -421,7 +426,7 @@@
  			/* We are at the file beginning; skip UTF8-encoded BOM
  			 * if present. Sane editors won't put this in on their
  			 * own, but e.g. Windows Notepad will do it happily. */
- 			if ((unsigned char) c == *bomptr) {
+ 			if (c == (*bomptr & 0377)) {
  				bomptr++;
  				continue;
  			} else {
@@@ -1345,7 -1340,7 +1345,7 @@@ static int configset_add_value(struct c
  		string_list_init(&e->value_list, 1);
  		hashmap_add(&cs->config_hash, e);
  	}
 -	si = string_list_append_nodup(&e->value_list, value ? xstrdup(value) : NULL);
 +	si = string_list_append_nodup(&e->value_list, xstrdup_or_null(value));
  
  	ALLOC_GROW(cs->list.items, cs->list.nr + 1, cs->list.alloc);
  	l_item = &cs->list.items[cs->list.nr++];
diff --combined dir.c
index 0943a81964,4c4bf910fa..a3e7073400
--- a/dir.c
+++ b/dir.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "refs.h"
  #include "wildmatch.h"
  #include "pathspec.h"
+ #include "utf8.h"
  
  struct path_simplify {
  	int len;
@@@ -377,49 -378,6 +378,49 @@@ int match_pathspec(const struct pathspe
  	return negative ? 0 : positive;
  }
  
 +int report_path_error(const char *ps_matched,
 +		      const struct pathspec *pathspec,
 +		      const char *prefix)
 +{
 +	/*
 +	 * Make sure all pathspec matched; otherwise it is an error.
 +	 */
 +	struct strbuf sb = STRBUF_INIT;
 +	int num, errors = 0;
 +	for (num = 0; num < pathspec->nr; num++) {
 +		int other, found_dup;
 +
 +		if (ps_matched[num])
 +			continue;
 +		/*
 +		 * The caller might have fed identical pathspec
 +		 * twice.  Do not barf on such a mistake.
 +		 * FIXME: parse_pathspec should have eliminated
 +		 * duplicate pathspec.
 +		 */
 +		for (found_dup = other = 0;
 +		     !found_dup && other < pathspec->nr;
 +		     other++) {
 +			if (other == num || !ps_matched[other])
 +				continue;
 +			if (!strcmp(pathspec->items[other].original,
 +				    pathspec->items[num].original))
 +				/*
 +				 * Ok, we have a match already.
 +				 */
 +				found_dup = 1;
 +		}
 +		if (found_dup)
 +			continue;
 +
 +		error("pathspec '%s' did not match any file(s) known to git.",
 +		      pathspec->items[num].original);
 +		errors++;
 +	}
 +	strbuf_release(&sb);
 +	return errors;
 +}
 +
  /*
   * Return the length of the "simple" part of a path match limiter.
   */
@@@ -617,7 -575,12 +618,12 @@@ int add_excludes_from_file_to_list(cons
  	}
  
  	el->filebuf = buf;
+ 
+ 	if (skip_utf8_bom(&buf, size))
+ 		size -= buf - el->filebuf;
+ 
  	entry = buf;
+ 
  	for (i = 0; i < size; i++) {
  		if (buf[i] == '\n') {
  			if (entry != buf + i && entry[0] != '#') {