Merge branch 'cn/bom-in-gitignore' into maint
authorJunio C Hamano <gitster@pobox.com>
Wed, 13 May 2015 21:05:51 +0000 (14:05 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 13 May 2015 21:05:51 +0000 (14:05 -0700)
Teach the codepaths that read .gitignore and .gitattributes files
that these files encoded in UTF-8 may have UTF-8 BOM marker at the
beginning; this makes it in line with what we do for configuration
files already.

* cn/bom-in-gitignore:
attr: skip UTF8 BOM at the beginning of the input file
config: use utf8_bom[] from utf.[ch] in git_parse_source()
utf8-bom: introduce skip_utf8_bom() helper
add_excludes_from_file: clarify the bom skipping logic
dir: allow a BOM at the beginning of exclude files

1  2 
attr.c
config.c
dir.c
diff --combined attr.c
index 1f9eebd2ddb7fe8a561500c41f33a52d1fb4fdd1,7c530f4d0c4c70666d49a97ed45e16461ccce702..7f445965c1886fe8cb53966969b48f4b0bde4826
--- 1/attr.c
--- 2/attr.c
+++ b/attr.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "exec_cmd.h"
  #include "attr.h"
  #include "dir.h"
+ #include "utf8.h"
  
  const char git_attr__true[] = "(builtin)true";
  const char git_attr__false[] = "\0(builtin)false";
@@@ -32,12 -33,9 +33,12 @@@ struct git_attr 
        struct git_attr *next;
        unsigned h;
        int attr_nr;
 +      int maybe_macro;
 +      int maybe_real;
        char name[FLEX_ARRAY];
  };
  static int attr_nr;
 +static int cannot_trust_maybe_real;
  
  static struct git_attr_check *check_all_attr;
  static struct git_attr *(git_attr_hash[HASHSIZE]);
@@@ -98,8 -96,6 +99,8 @@@ static struct git_attr *git_attr_intern
        a->h = hval;
        a->next = git_attr_hash[pos];
        a->attr_nr = attr_nr++;
 +      a->maybe_macro = 0;
 +      a->maybe_real = 0;
        git_attr_hash[pos] = a;
  
        REALLOC_ARRAY(check_all_attr, attr_nr);
@@@ -249,10 -245,9 +250,10 @@@ static struct match_attr *parse_attr_li
                      sizeof(*res) +
                      sizeof(struct attr_state) * num_attr +
                      (is_macro ? 0 : namelen + 1));
 -      if (is_macro)
 +      if (is_macro) {
                res->u.attr = git_attr_internal(name, namelen);
 -      else {
 +              res->u.attr->maybe_macro = 1;
 +      } else {
                char *p = (char *)&(res->state[num_attr]);
                memcpy(p, name, namelen);
                res->u.pat.pattern = p;
        /* Second pass to fill the attr_states */
        for (cp = states, i = 0; *cp; i++) {
                cp = parse_attr(src, lineno, cp, &(res->state[i]));
 +              if (!is_macro)
 +                      res->state[i].attr->maybe_real = 1;
 +              if (res->state[i].attr->maybe_macro)
 +                      cannot_trust_maybe_real = 1;
        }
  
        return res;
@@@ -379,8 -370,12 +380,12 @@@ static struct attr_stack *read_attr_fro
                return NULL;
        }
        res = xcalloc(1, sizeof(*res));
-       while (fgets(buf, sizeof(buf), fp))
-               handle_attr_line(res, buf, path, ++lineno, macro_ok);
+       while (fgets(buf, sizeof(buf), fp)) {
+               char *bufp = buf;
+               if (!lineno)
+                       skip_utf8_bom(&bufp, strlen(bufp));
+               handle_attr_line(res, bufp, path, ++lineno, macro_ok);
+       }
        fclose(fp);
        return res;
  }
@@@ -691,14 -686,13 +696,14 @@@ static int fill(const char *path, int p
        return rem;
  }
  
 -static int macroexpand_one(int attr_nr, int rem)
 +static int macroexpand_one(int nr, int rem)
  {
        struct attr_stack *stk;
        struct match_attr *a = NULL;
        int i;
  
 -      if (check_all_attr[attr_nr].value != ATTR__TRUE)
 +      if (check_all_attr[nr].value != ATTR__TRUE ||
 +          !check_all_attr[nr].attr->maybe_macro)
                return rem;
  
        for (stk = attr_stack; !a && stk; stk = stk->prev)
                        struct match_attr *ma = stk->attrs[i];
                        if (!ma->is_macro)
                                continue;
 -                      if (ma->u.attr->attr_nr == attr_nr)
 +                      if (ma->u.attr->attr_nr == nr)
                                a = ma;
                }
  
  }
  
  /*
 - * Collect all attributes for path into the array pointed to by
 - * check_all_attr.
 + * Collect attributes for path into the array pointed to by
 + * check_all_attr. If num is non-zero, only attributes in check[] are
 + * collected. Otherwise all attributes are collected.
   */
 -static void collect_all_attrs(const char *path)
 +static void collect_some_attrs(const char *path, int num,
 +                             struct git_attr_check *check)
 +
  {
        struct attr_stack *stk;
        int i, pathlen, rem, dirlen;
        prepare_attr_stack(path, dirlen);
        for (i = 0; i < attr_nr; i++)
                check_all_attr[i].value = ATTR__UNKNOWN;
 +      if (num && !cannot_trust_maybe_real) {
 +              rem = 0;
 +              for (i = 0; i < num; i++) {
 +                      if (!check[i].attr->maybe_real) {
 +                              struct git_attr_check *c;
 +                              c = check_all_attr + check[i].attr->attr_nr;
 +                              c->value = ATTR__UNSET;
 +                              rem++;
 +                      }
 +              }
 +              if (rem == num)
 +                      return;
 +      }
  
        rem = attr_nr;
        for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
@@@ -769,7 -747,7 +774,7 @@@ int git_check_attr(const char *path, in
  {
        int i;
  
 -      collect_all_attrs(path);
 +      collect_some_attrs(path, num, check);
  
        for (i = 0; i < num; i++) {
                const char *value = check_all_attr[check[i].attr->attr_nr].value;
@@@ -785,7 -763,7 +790,7 @@@ int git_all_attrs(const char *path, in
  {
        int i, count, j;
  
 -      collect_all_attrs(path);
 +      collect_some_attrs(path, 0, NULL);
  
        /* Count the number of attributes that are set. */
        count = 0;
diff --combined config.c
index 66c0a51bce529e4c027f11017697a62dd737b3bd,9618aa443dee975c639a31a4ae2b3d55eed06d6e..c4424c01388496b5995e19f9601f0c87b9fdd3c0
+++ b/config.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "quote.h"
  #include "hashmap.h"
  #include "string-list.h"
+ #include "utf8.h"
  
  struct config_source {
        struct config_source *prev;
@@@ -73,12 -74,8 +74,12 @@@ static int config_buf_fgetc(struct conf
  
  static int config_buf_ungetc(int c, struct config_source *conf)
  {
 -      if (conf->u.buf.pos > 0)
 -              return conf->u.buf.buf[--conf->u.buf.pos];
 +      if (conf->u.buf.pos > 0) {
 +              conf->u.buf.pos--;
 +              if (conf->u.buf.buf[conf->u.buf.pos] != c)
 +                      die("BUG: config_buf can only ungetc the same character");
 +              return c;
 +      }
  
        return EOF;
  }
@@@ -239,8 -236,7 +240,8 @@@ static int get_next_char(void
                /* DOS like systems */
                c = cf->do_fgetc(cf);
                if (c != '\n') {
 -                      cf->do_ungetc(c, cf);
 +                      if (c != EOF)
 +                              cf->do_ungetc(c, cf);
                        c = '\r';
                }
        }
@@@ -417,8 -413,7 +418,7 @@@ static int git_parse_source(config_fn_
        struct strbuf *var = &cf->var;
  
        /* U+FEFF Byte Order Mark in UTF8 */
-       static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
-       const unsigned char *bomptr = utf8_bom;
+       const char *bomptr = utf8_bom;
  
        for (;;) {
                int c = get_next_char();
                        /* We are at the file beginning; skip UTF8-encoded BOM
                         * if present. Sane editors won't put this in on their
                         * own, but e.g. Windows Notepad will do it happily. */
-                       if ((unsigned char) c == *bomptr) {
+                       if (c == (*bomptr & 0377)) {
                                bomptr++;
                                continue;
                        } else {
@@@ -1345,7 -1340,7 +1345,7 @@@ static int configset_add_value(struct c
                string_list_init(&e->value_list, 1);
                hashmap_add(&cs->config_hash, e);
        }
 -      si = string_list_append_nodup(&e->value_list, value ? xstrdup(value) : NULL);
 +      si = string_list_append_nodup(&e->value_list, xstrdup_or_null(value));
  
        ALLOC_GROW(cs->list.items, cs->list.nr + 1, cs->list.alloc);
        l_item = &cs->list.items[cs->list.nr++];
diff --combined dir.c
index 0943a81964ddb7b5b1d83c9c8eafe2b3b2b9da09,4c4bf910fa2b950c094cfedc1a24ab5ce6c088ac..a3e70734004e5da22dce9ba4062c0d6684061855
--- 1/dir.c
--- 2/dir.c
+++ b/dir.c
@@@ -12,6 -12,7 +12,7 @@@
  #include "refs.h"
  #include "wildmatch.h"
  #include "pathspec.h"
+ #include "utf8.h"
  
  struct path_simplify {
        int len;
@@@ -377,49 -378,6 +378,49 @@@ int match_pathspec(const struct pathspe
        return negative ? 0 : positive;
  }
  
 +int report_path_error(const char *ps_matched,
 +                    const struct pathspec *pathspec,
 +                    const char *prefix)
 +{
 +      /*
 +       * Make sure all pathspec matched; otherwise it is an error.
 +       */
 +      struct strbuf sb = STRBUF_INIT;
 +      int num, errors = 0;
 +      for (num = 0; num < pathspec->nr; num++) {
 +              int other, found_dup;
 +
 +              if (ps_matched[num])
 +                      continue;
 +              /*
 +               * The caller might have fed identical pathspec
 +               * twice.  Do not barf on such a mistake.
 +               * FIXME: parse_pathspec should have eliminated
 +               * duplicate pathspec.
 +               */
 +              for (found_dup = other = 0;
 +                   !found_dup && other < pathspec->nr;
 +                   other++) {
 +                      if (other == num || !ps_matched[other])
 +                              continue;
 +                      if (!strcmp(pathspec->items[other].original,
 +                                  pathspec->items[num].original))
 +                              /*
 +                               * Ok, we have a match already.
 +                               */
 +                              found_dup = 1;
 +              }
 +              if (found_dup)
 +                      continue;
 +
 +              error("pathspec '%s' did not match any file(s) known to git.",
 +                    pathspec->items[num].original);
 +              errors++;
 +      }
 +      strbuf_release(&sb);
 +      return errors;
 +}
 +
  /*
   * Return the length of the "simple" part of a path match limiter.
   */
@@@ -617,7 -575,12 +618,12 @@@ int add_excludes_from_file_to_list(cons
        }
  
        el->filebuf = buf;
+       if (skip_utf8_bom(&buf, size))
+               size -= buf - el->filebuf;
        entry = buf;
        for (i = 0; i < size; i++) {
                if (buf[i] == '\n') {
                        if (entry != buf + i && entry[0] != '#') {