attr.con commit Add a file comment (86ab7f0)
   1/*
   2 * Handle git attributes.  See gitattributes(5) for a description of
   3 * the file syntax, and Documentation/technical/api-gitattributes.txt
   4 * for a description of the API.
   5 *
   6 * One basic design decision here is that we are not going to support
   7 * an insanely large number of attributes.
   8 */
   9
  10#define NO_THE_INDEX_COMPATIBILITY_MACROS
  11#include "cache.h"
  12#include "exec_cmd.h"
  13#include "attr.h"
  14
  15const char git_attr__true[] = "(builtin)true";
  16const char git_attr__false[] = "\0(builtin)false";
  17static const char git_attr__unknown[] = "(builtin)unknown";
  18#define ATTR__TRUE git_attr__true
  19#define ATTR__FALSE git_attr__false
  20#define ATTR__UNSET NULL
  21#define ATTR__UNKNOWN git_attr__unknown
  22
  23static const char *attributes_file;
  24
  25/* This is a randomly chosen prime. */
  26#define HASHSIZE 257
  27
  28#ifndef DEBUG_ATTR
  29#define DEBUG_ATTR 0
  30#endif
  31
  32struct git_attr {
  33        struct git_attr *next;
  34        unsigned h;
  35        int attr_nr;
  36        char name[FLEX_ARRAY];
  37};
  38static int attr_nr;
  39
  40static struct git_attr_check *check_all_attr;
  41static struct git_attr *(git_attr_hash[HASHSIZE]);
  42
  43static unsigned hash_name(const char *name, int namelen)
  44{
  45        unsigned val = 0, c;
  46
  47        while (namelen--) {
  48                c = *name++;
  49                val = ((val << 7) | (val >> 22)) ^ c;
  50        }
  51        return val;
  52}
  53
  54static int invalid_attr_name(const char *name, int namelen)
  55{
  56        /*
  57         * Attribute name cannot begin with '-' and from
  58         * [-A-Za-z0-9_.].  We'd specifically exclude '=' for now,
  59         * as we might later want to allow non-binary value for
  60         * attributes, e.g. "*.svg      merge=special-merge-program-for-svg"
  61         */
  62        if (*name == '-')
  63                return -1;
  64        while (namelen--) {
  65                char ch = *name++;
  66                if (! (ch == '-' || ch == '.' || ch == '_' ||
  67                       ('0' <= ch && ch <= '9') ||
  68                       ('a' <= ch && ch <= 'z') ||
  69                       ('A' <= ch && ch <= 'Z')) )
  70                        return -1;
  71        }
  72        return 0;
  73}
  74
  75static struct git_attr *git_attr_internal(const char *name, int len)
  76{
  77        unsigned hval = hash_name(name, len);
  78        unsigned pos = hval % HASHSIZE;
  79        struct git_attr *a;
  80
  81        for (a = git_attr_hash[pos]; a; a = a->next) {
  82                if (a->h == hval &&
  83                    !memcmp(a->name, name, len) && !a->name[len])
  84                        return a;
  85        }
  86
  87        if (invalid_attr_name(name, len))
  88                return NULL;
  89
  90        a = xmalloc(sizeof(*a) + len + 1);
  91        memcpy(a->name, name, len);
  92        a->name[len] = 0;
  93        a->h = hval;
  94        a->next = git_attr_hash[pos];
  95        a->attr_nr = attr_nr++;
  96        git_attr_hash[pos] = a;
  97
  98        check_all_attr = xrealloc(check_all_attr,
  99                                  sizeof(*check_all_attr) * attr_nr);
 100        check_all_attr[a->attr_nr].attr = a;
 101        check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
 102        return a;
 103}
 104
 105struct git_attr *git_attr(const char *name)
 106{
 107        return git_attr_internal(name, strlen(name));
 108}
 109
 110/* What does a matched pattern decide? */
 111struct attr_state {
 112        struct git_attr *attr;
 113        const char *setto;
 114};
 115
 116struct match_attr {
 117        union {
 118                char *pattern;
 119                struct git_attr *attr;
 120        } u;
 121        char is_macro;
 122        unsigned num_attr;
 123        struct attr_state state[FLEX_ARRAY];
 124};
 125
 126static const char blank[] = " \t\r\n";
 127
 128static const char *parse_attr(const char *src, int lineno, const char *cp,
 129                              int *num_attr, struct match_attr *res)
 130{
 131        const char *ep, *equals;
 132        int len;
 133
 134        ep = cp + strcspn(cp, blank);
 135        equals = strchr(cp, '=');
 136        if (equals && ep < equals)
 137                equals = NULL;
 138        if (equals)
 139                len = equals - cp;
 140        else
 141                len = ep - cp;
 142        if (!res) {
 143                if (*cp == '-' || *cp == '!') {
 144                        cp++;
 145                        len--;
 146                }
 147                if (invalid_attr_name(cp, len)) {
 148                        fprintf(stderr,
 149                                "%.*s is not a valid attribute name: %s:%d\n",
 150                                len, cp, src, lineno);
 151                        return NULL;
 152                }
 153        } else {
 154                struct attr_state *e;
 155
 156                e = &(res->state[*num_attr]);
 157                if (*cp == '-' || *cp == '!') {
 158                        e->setto = (*cp == '-') ? ATTR__FALSE : ATTR__UNSET;
 159                        cp++;
 160                        len--;
 161                }
 162                else if (!equals)
 163                        e->setto = ATTR__TRUE;
 164                else {
 165                        e->setto = xmemdupz(equals + 1, ep - equals - 1);
 166                }
 167                e->attr = git_attr_internal(cp, len);
 168        }
 169        (*num_attr)++;
 170        return ep + strspn(ep, blank);
 171}
 172
 173static struct match_attr *parse_attr_line(const char *line, const char *src,
 174                                          int lineno, int macro_ok)
 175{
 176        int namelen;
 177        int num_attr;
 178        const char *cp, *name;
 179        struct match_attr *res = NULL;
 180        int pass;
 181        int is_macro;
 182
 183        cp = line + strspn(line, blank);
 184        if (!*cp || *cp == '#')
 185                return NULL;
 186        name = cp;
 187        namelen = strcspn(name, blank);
 188        if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen &&
 189            !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) {
 190                if (!macro_ok) {
 191                        fprintf(stderr, "%s not allowed: %s:%d\n",
 192                                name, src, lineno);
 193                        return NULL;
 194                }
 195                is_macro = 1;
 196                name += strlen(ATTRIBUTE_MACRO_PREFIX);
 197                name += strspn(name, blank);
 198                namelen = strcspn(name, blank);
 199                if (invalid_attr_name(name, namelen)) {
 200                        fprintf(stderr,
 201                                "%.*s is not a valid attribute name: %s:%d\n",
 202                                namelen, name, src, lineno);
 203                        return NULL;
 204                }
 205        }
 206        else
 207                is_macro = 0;
 208
 209        for (pass = 0; pass < 2; pass++) {
 210                /* pass 0 counts and allocates, pass 1 fills */
 211                num_attr = 0;
 212                cp = name + namelen;
 213                cp = cp + strspn(cp, blank);
 214                while (*cp) {
 215                        cp = parse_attr(src, lineno, cp, &num_attr, res);
 216                        if (!cp)
 217                                return NULL;
 218                }
 219                if (pass)
 220                        break;
 221                res = xcalloc(1,
 222                              sizeof(*res) +
 223                              sizeof(struct attr_state) * num_attr +
 224                              (is_macro ? 0 : namelen + 1));
 225                if (is_macro)
 226                        res->u.attr = git_attr_internal(name, namelen);
 227                else {
 228                        res->u.pattern = (char *)&(res->state[num_attr]);
 229                        memcpy(res->u.pattern, name, namelen);
 230                        res->u.pattern[namelen] = 0;
 231                }
 232                res->is_macro = is_macro;
 233                res->num_attr = num_attr;
 234        }
 235        return res;
 236}
 237
 238/*
 239 * Like info/exclude and .gitignore, the attribute information can
 240 * come from many places.
 241 *
 242 * (1) .gitattribute file of the same directory;
 243 * (2) .gitattribute file of the parent directory if (1) does not have
 244 *      any match; this goes recursively upwards, just like .gitignore.
 245 * (3) $GIT_DIR/info/attributes, which overrides both of the above.
 246 *
 247 * In the same file, later entries override the earlier match, so in the
 248 * global list, we would have entries from info/attributes the earliest
 249 * (reading the file from top to bottom), .gitattribute of the root
 250 * directory (again, reading the file from top to bottom) down to the
 251 * current directory, and then scan the list backwards to find the first match.
 252 * This is exactly the same as what excluded() does in dir.c to deal with
 253 * .gitignore
 254 */
 255
 256static struct attr_stack {
 257        struct attr_stack *prev;
 258        char *origin;
 259        unsigned num_matches;
 260        unsigned alloc;
 261        struct match_attr **attrs;
 262} *attr_stack;
 263
 264static void free_attr_elem(struct attr_stack *e)
 265{
 266        int i;
 267        free(e->origin);
 268        for (i = 0; i < e->num_matches; i++) {
 269                struct match_attr *a = e->attrs[i];
 270                int j;
 271                for (j = 0; j < a->num_attr; j++) {
 272                        const char *setto = a->state[j].setto;
 273                        if (setto == ATTR__TRUE ||
 274                            setto == ATTR__FALSE ||
 275                            setto == ATTR__UNSET ||
 276                            setto == ATTR__UNKNOWN)
 277                                ;
 278                        else
 279                                free((char *) setto);
 280                }
 281                free(a);
 282        }
 283        free(e);
 284}
 285
 286static const char *builtin_attr[] = {
 287        "[attr]binary -diff -text",
 288        NULL,
 289};
 290
 291static void handle_attr_line(struct attr_stack *res,
 292                             const char *line,
 293                             const char *src,
 294                             int lineno,
 295                             int macro_ok)
 296{
 297        struct match_attr *a;
 298
 299        a = parse_attr_line(line, src, lineno, macro_ok);
 300        if (!a)
 301                return;
 302        if (res->alloc <= res->num_matches) {
 303                res->alloc = alloc_nr(res->num_matches);
 304                res->attrs = xrealloc(res->attrs,
 305                                      sizeof(struct match_attr *) *
 306                                      res->alloc);
 307        }
 308        res->attrs[res->num_matches++] = a;
 309}
 310
 311static struct attr_stack *read_attr_from_array(const char **list)
 312{
 313        struct attr_stack *res;
 314        const char *line;
 315        int lineno = 0;
 316
 317        res = xcalloc(1, sizeof(*res));
 318        while ((line = *(list++)) != NULL)
 319                handle_attr_line(res, line, "[builtin]", ++lineno, 1);
 320        return res;
 321}
 322
 323static enum git_attr_direction direction;
 324static struct index_state *use_index;
 325
 326static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
 327{
 328        FILE *fp = fopen(path, "r");
 329        struct attr_stack *res;
 330        char buf[2048];
 331        int lineno = 0;
 332
 333        if (!fp)
 334                return NULL;
 335        res = xcalloc(1, sizeof(*res));
 336        while (fgets(buf, sizeof(buf), fp))
 337                handle_attr_line(res, buf, path, ++lineno, macro_ok);
 338        fclose(fp);
 339        return res;
 340}
 341
 342static void *read_index_data(const char *path)
 343{
 344        int pos, len;
 345        unsigned long sz;
 346        enum object_type type;
 347        void *data;
 348        struct index_state *istate = use_index ? use_index : &the_index;
 349
 350        len = strlen(path);
 351        pos = index_name_pos(istate, path, len);
 352        if (pos < 0) {
 353                /*
 354                 * We might be in the middle of a merge, in which
 355                 * case we would read stage #2 (ours).
 356                 */
 357                int i;
 358                for (i = -pos - 1;
 359                     (pos < 0 && i < istate->cache_nr &&
 360                      !strcmp(istate->cache[i]->name, path));
 361                     i++)
 362                        if (ce_stage(istate->cache[i]) == 2)
 363                                pos = i;
 364        }
 365        if (pos < 0)
 366                return NULL;
 367        data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
 368        if (!data || type != OBJ_BLOB) {
 369                free(data);
 370                return NULL;
 371        }
 372        return data;
 373}
 374
 375static struct attr_stack *read_attr_from_index(const char *path, int macro_ok)
 376{
 377        struct attr_stack *res;
 378        char *buf, *sp;
 379        int lineno = 0;
 380
 381        buf = read_index_data(path);
 382        if (!buf)
 383                return NULL;
 384
 385        res = xcalloc(1, sizeof(*res));
 386        for (sp = buf; *sp; ) {
 387                char *ep;
 388                int more;
 389                for (ep = sp; *ep && *ep != '\n'; ep++)
 390                        ;
 391                more = (*ep == '\n');
 392                *ep = '\0';
 393                handle_attr_line(res, sp, path, ++lineno, macro_ok);
 394                sp = ep + more;
 395        }
 396        free(buf);
 397        return res;
 398}
 399
 400static struct attr_stack *read_attr(const char *path, int macro_ok)
 401{
 402        struct attr_stack *res;
 403
 404        if (direction == GIT_ATTR_CHECKOUT) {
 405                res = read_attr_from_index(path, macro_ok);
 406                if (!res)
 407                        res = read_attr_from_file(path, macro_ok);
 408        }
 409        else if (direction == GIT_ATTR_CHECKIN) {
 410                res = read_attr_from_file(path, macro_ok);
 411                if (!res)
 412                        /*
 413                         * There is no checked out .gitattributes file there, but
 414                         * we might have it in the index.  We allow operation in a
 415                         * sparsely checked out work tree, so read from it.
 416                         */
 417                        res = read_attr_from_index(path, macro_ok);
 418        }
 419        else
 420                res = read_attr_from_index(path, macro_ok);
 421        if (!res)
 422                res = xcalloc(1, sizeof(*res));
 423        return res;
 424}
 425
 426#if DEBUG_ATTR
 427static void debug_info(const char *what, struct attr_stack *elem)
 428{
 429        fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()");
 430}
 431static void debug_set(const char *what, const char *match, struct git_attr *attr, const void *v)
 432{
 433        const char *value = v;
 434
 435        if (ATTR_TRUE(value))
 436                value = "set";
 437        else if (ATTR_FALSE(value))
 438                value = "unset";
 439        else if (ATTR_UNSET(value))
 440                value = "unspecified";
 441
 442        fprintf(stderr, "%s: %s => %s (%s)\n",
 443                what, attr->name, (char *) value, match);
 444}
 445#define debug_push(a) debug_info("push", (a))
 446#define debug_pop(a) debug_info("pop", (a))
 447#else
 448#define debug_push(a) do { ; } while (0)
 449#define debug_pop(a) do { ; } while (0)
 450#define debug_set(a,b,c,d) do { ; } while (0)
 451#endif
 452
 453static void drop_attr_stack(void)
 454{
 455        while (attr_stack) {
 456                struct attr_stack *elem = attr_stack;
 457                attr_stack = elem->prev;
 458                free_attr_elem(elem);
 459        }
 460}
 461
 462static const char *git_etc_gitattributes(void)
 463{
 464        static const char *system_wide;
 465        if (!system_wide)
 466                system_wide = system_path(ETC_GITATTRIBUTES);
 467        return system_wide;
 468}
 469
 470static int git_attr_system(void)
 471{
 472        return !git_env_bool("GIT_ATTR_NOSYSTEM", 0);
 473}
 474
 475static int git_attr_config(const char *var, const char *value, void *dummy)
 476{
 477        if (!strcmp(var, "core.attributesfile"))
 478                return git_config_pathname(&attributes_file, var, value);
 479
 480        return 0;
 481}
 482
 483static void bootstrap_attr_stack(void)
 484{
 485        if (!attr_stack) {
 486                struct attr_stack *elem;
 487
 488                elem = read_attr_from_array(builtin_attr);
 489                elem->origin = NULL;
 490                elem->prev = attr_stack;
 491                attr_stack = elem;
 492
 493                if (git_attr_system()) {
 494                        elem = read_attr_from_file(git_etc_gitattributes(), 1);
 495                        if (elem) {
 496                                elem->origin = NULL;
 497                                elem->prev = attr_stack;
 498                                attr_stack = elem;
 499                        }
 500                }
 501
 502                git_config(git_attr_config, NULL);
 503                if (attributes_file) {
 504                        elem = read_attr_from_file(attributes_file, 1);
 505                        if (elem) {
 506                                elem->origin = NULL;
 507                                elem->prev = attr_stack;
 508                                attr_stack = elem;
 509                        }
 510                }
 511
 512                if (!is_bare_repository() || direction == GIT_ATTR_INDEX) {
 513                        elem = read_attr(GITATTRIBUTES_FILE, 1);
 514                        elem->origin = strdup("");
 515                        elem->prev = attr_stack;
 516                        attr_stack = elem;
 517                        debug_push(elem);
 518                }
 519
 520                elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE), 1);
 521                if (!elem)
 522                        elem = xcalloc(1, sizeof(*elem));
 523                elem->origin = NULL;
 524                elem->prev = attr_stack;
 525                attr_stack = elem;
 526        }
 527}
 528
 529static void prepare_attr_stack(const char *path, int dirlen)
 530{
 531        struct attr_stack *elem, *info;
 532        int len;
 533        struct strbuf pathbuf;
 534
 535        strbuf_init(&pathbuf, dirlen+2+strlen(GITATTRIBUTES_FILE));
 536
 537        /*
 538         * At the bottom of the attribute stack is the built-in
 539         * set of attribute definitions, followed by the contents
 540         * of $(prefix)/etc/gitattributes and a file specified by
 541         * core.attributesfile.  Then, contents from
 542         * .gitattribute files from directories closer to the
 543         * root to the ones in deeper directories are pushed
 544         * to the stack.  Finally, at the very top of the stack
 545         * we always keep the contents of $GIT_DIR/info/attributes.
 546         *
 547         * When checking, we use entries from near the top of the
 548         * stack, preferring $GIT_DIR/info/attributes, then
 549         * .gitattributes in deeper directories to shallower ones,
 550         * and finally use the built-in set as the default.
 551         */
 552        if (!attr_stack)
 553                bootstrap_attr_stack();
 554
 555        /*
 556         * Pop the "info" one that is always at the top of the stack.
 557         */
 558        info = attr_stack;
 559        attr_stack = info->prev;
 560
 561        /*
 562         * Pop the ones from directories that are not the prefix of
 563         * the path we are checking.
 564         */
 565        while (attr_stack && attr_stack->origin) {
 566                int namelen = strlen(attr_stack->origin);
 567
 568                elem = attr_stack;
 569                if (namelen <= dirlen &&
 570                    !strncmp(elem->origin, path, namelen))
 571                        break;
 572
 573                debug_pop(elem);
 574                attr_stack = elem->prev;
 575                free_attr_elem(elem);
 576        }
 577
 578        /*
 579         * Read from parent directories and push them down
 580         */
 581        if (!is_bare_repository() || direction == GIT_ATTR_INDEX) {
 582                while (1) {
 583                        char *cp;
 584
 585                        len = strlen(attr_stack->origin);
 586                        if (dirlen <= len)
 587                                break;
 588                        strbuf_reset(&pathbuf);
 589                        strbuf_add(&pathbuf, path, dirlen);
 590                        strbuf_addch(&pathbuf, '/');
 591                        cp = strchr(pathbuf.buf + len + 1, '/');
 592                        strcpy(cp + 1, GITATTRIBUTES_FILE);
 593                        elem = read_attr(pathbuf.buf, 0);
 594                        *cp = '\0';
 595                        elem->origin = strdup(pathbuf.buf);
 596                        elem->prev = attr_stack;
 597                        attr_stack = elem;
 598                        debug_push(elem);
 599                }
 600        }
 601
 602        strbuf_release(&pathbuf);
 603
 604        /*
 605         * Finally push the "info" one at the top of the stack.
 606         */
 607        info->prev = attr_stack;
 608        attr_stack = info;
 609}
 610
 611static int path_matches(const char *pathname, int pathlen,
 612                        const char *pattern,
 613                        const char *base, int baselen)
 614{
 615        if (!strchr(pattern, '/')) {
 616                /* match basename */
 617                const char *basename = strrchr(pathname, '/');
 618                basename = basename ? basename + 1 : pathname;
 619                return (fnmatch(pattern, basename, 0) == 0);
 620        }
 621        /*
 622         * match with FNM_PATHNAME; the pattern has base implicitly
 623         * in front of it.
 624         */
 625        if (*pattern == '/')
 626                pattern++;
 627        if (pathlen < baselen ||
 628            (baselen && pathname[baselen] != '/') ||
 629            strncmp(pathname, base, baselen))
 630                return 0;
 631        if (baselen != 0)
 632                baselen++;
 633        return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
 634}
 635
 636static int macroexpand_one(int attr_nr, int rem);
 637
 638static int fill_one(const char *what, struct match_attr *a, int rem)
 639{
 640        struct git_attr_check *check = check_all_attr;
 641        int i;
 642
 643        for (i = a->num_attr - 1; 0 < rem && 0 <= i; i--) {
 644                struct git_attr *attr = a->state[i].attr;
 645                const char **n = &(check[attr->attr_nr].value);
 646                const char *v = a->state[i].setto;
 647
 648                if (*n == ATTR__UNKNOWN) {
 649                        debug_set(what,
 650                                  a->is_macro ? a->u.attr->name : a->u.pattern,
 651                                  attr, v);
 652                        *n = v;
 653                        rem--;
 654                        rem = macroexpand_one(attr->attr_nr, rem);
 655                }
 656        }
 657        return rem;
 658}
 659
 660static int fill(const char *path, int pathlen, struct attr_stack *stk, int rem)
 661{
 662        int i;
 663        const char *base = stk->origin ? stk->origin : "";
 664
 665        for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
 666                struct match_attr *a = stk->attrs[i];
 667                if (a->is_macro)
 668                        continue;
 669                if (path_matches(path, pathlen,
 670                                 a->u.pattern, base, strlen(base)))
 671                        rem = fill_one("fill", a, rem);
 672        }
 673        return rem;
 674}
 675
 676static int macroexpand_one(int attr_nr, int rem)
 677{
 678        struct attr_stack *stk;
 679        struct match_attr *a = NULL;
 680        int i;
 681
 682        if (check_all_attr[attr_nr].value != ATTR__TRUE)
 683                return rem;
 684
 685        for (stk = attr_stack; !a && stk; stk = stk->prev)
 686                for (i = stk->num_matches - 1; !a && 0 <= i; i--) {
 687                        struct match_attr *ma = stk->attrs[i];
 688                        if (!ma->is_macro)
 689                                continue;
 690                        if (ma->u.attr->attr_nr == attr_nr)
 691                                a = ma;
 692                }
 693
 694        if (a)
 695                rem = fill_one("expand", a, rem);
 696
 697        return rem;
 698}
 699
 700int git_checkattr(const char *path, int num, struct git_attr_check *check)
 701{
 702        struct attr_stack *stk;
 703        const char *cp;
 704        int dirlen, pathlen, i, rem;
 705
 706        bootstrap_attr_stack();
 707        for (i = 0; i < attr_nr; i++)
 708                check_all_attr[i].value = ATTR__UNKNOWN;
 709
 710        pathlen = strlen(path);
 711        cp = strrchr(path, '/');
 712        if (!cp)
 713                dirlen = 0;
 714        else
 715                dirlen = cp - path;
 716        prepare_attr_stack(path, dirlen);
 717        rem = attr_nr;
 718        for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
 719                rem = fill(path, pathlen, stk, rem);
 720
 721        for (i = 0; i < num; i++) {
 722                const char *value = check_all_attr[check[i].attr->attr_nr].value;
 723                if (value == ATTR__UNKNOWN)
 724                        value = ATTR__UNSET;
 725                check[i].value = value;
 726        }
 727
 728        return 0;
 729}
 730
 731void git_attr_set_direction(enum git_attr_direction new, struct index_state *istate)
 732{
 733        enum git_attr_direction old = direction;
 734
 735        if (is_bare_repository() && new != GIT_ATTR_INDEX)
 736                die("BUG: non-INDEX attr direction in a bare repo");
 737
 738        direction = new;
 739        if (new != old)
 740                drop_attr_stack();
 741        use_index = istate;
 742}