convert.con commit log-tree.c: Use struct name_decoration's type for classifying decoration (a752412)
   1#include "cache.h"
   2#include "attr.h"
   3#include "run-command.h"
   4
   5/*
   6 * convert.c - convert a file when checking it out and checking it in.
   7 *
   8 * This should use the pathname to decide on whether it wants to do some
   9 * more interesting conversions (automatic gzip/unzip, general format
  10 * conversions etc etc), but by default it just does automatic CRLF<->LF
  11 * translation when the "auto_crlf" option is set.
  12 */
  13
  14#define CRLF_GUESS      (-1)
  15#define CRLF_BINARY     0
  16#define CRLF_TEXT       1
  17#define CRLF_INPUT      2
  18
  19struct text_stat {
  20        /* NUL, CR, LF and CRLF counts */
  21        unsigned nul, cr, lf, crlf;
  22
  23        /* These are just approximations! */
  24        unsigned printable, nonprintable;
  25};
  26
  27static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
  28{
  29        unsigned long i;
  30
  31        memset(stats, 0, sizeof(*stats));
  32
  33        for (i = 0; i < size; i++) {
  34                unsigned char c = buf[i];
  35                if (c == '\r') {
  36                        stats->cr++;
  37                        if (i+1 < size && buf[i+1] == '\n')
  38                                stats->crlf++;
  39                        continue;
  40                }
  41                if (c == '\n') {
  42                        stats->lf++;
  43                        continue;
  44                }
  45                if (c == 127)
  46                        /* DEL */
  47                        stats->nonprintable++;
  48                else if (c < 32) {
  49                        switch (c) {
  50                                /* BS, HT, ESC and FF */
  51                        case '\b': case '\t': case '\033': case '\014':
  52                                stats->printable++;
  53                                break;
  54                        case 0:
  55                                stats->nul++;
  56                                /* fall through */
  57                        default:
  58                                stats->nonprintable++;
  59                        }
  60                }
  61                else
  62                        stats->printable++;
  63        }
  64
  65        /* If file ends with EOF then don't count this EOF as non-printable. */
  66        if (size >= 1 && buf[size-1] == '\032')
  67                stats->nonprintable--;
  68}
  69
  70/*
  71 * The same heuristics as diff.c::mmfile_is_binary()
  72 */
  73static int is_binary(unsigned long size, struct text_stat *stats)
  74{
  75
  76        if (stats->nul)
  77                return 1;
  78        if ((stats->printable >> 7) < stats->nonprintable)
  79                return 1;
  80        /*
  81         * Other heuristics? Average line length might be relevant,
  82         * as might LF vs CR vs CRLF counts..
  83         *
  84         * NOTE! It might be normal to have a low ratio of CRLF to LF
  85         * (somebody starts with a LF-only file and edits it with an editor
  86         * that adds CRLF only to lines that are added..). But do  we
  87         * want to support CR-only? Probably not.
  88         */
  89        return 0;
  90}
  91
  92static void check_safe_crlf(const char *path, int action,
  93                            struct text_stat *stats, enum safe_crlf checksafe)
  94{
  95        if (!checksafe)
  96                return;
  97
  98        if (action == CRLF_INPUT || auto_crlf <= 0) {
  99                /*
 100                 * CRLFs would not be restored by checkout:
 101                 * check if we'd remove CRLFs
 102                 */
 103                if (stats->crlf) {
 104                        if (checksafe == SAFE_CRLF_WARN)
 105                                warning("CRLF will be replaced by LF in %s.", path);
 106                        else /* i.e. SAFE_CRLF_FAIL */
 107                                die("CRLF would be replaced by LF in %s.", path);
 108                }
 109        } else if (auto_crlf > 0) {
 110                /*
 111                 * CRLFs would be added by checkout:
 112                 * check if we have "naked" LFs
 113                 */
 114                if (stats->lf != stats->crlf) {
 115                        if (checksafe == SAFE_CRLF_WARN)
 116                                warning("LF will be replaced by CRLF in %s", path);
 117                        else /* i.e. SAFE_CRLF_FAIL */
 118                                die("LF would be replaced by CRLF in %s", path);
 119                }
 120        }
 121}
 122
 123static int crlf_to_git(const char *path, const char *src, size_t len,
 124                       struct strbuf *buf, int action, enum safe_crlf checksafe)
 125{
 126        struct text_stat stats;
 127        char *dst;
 128
 129        if ((action == CRLF_BINARY) || !auto_crlf || !len)
 130                return 0;
 131
 132        gather_stats(src, len, &stats);
 133
 134        if (action == CRLF_GUESS) {
 135                /*
 136                 * We're currently not going to even try to convert stuff
 137                 * that has bare CR characters. Does anybody do that crazy
 138                 * stuff?
 139                 */
 140                if (stats.cr != stats.crlf)
 141                        return 0;
 142
 143                /*
 144                 * And add some heuristics for binary vs text, of course...
 145                 */
 146                if (is_binary(len, &stats))
 147                        return 0;
 148        }
 149
 150        check_safe_crlf(path, action, &stats, checksafe);
 151
 152        /* Optimization: No CR? Nothing to convert, regardless. */
 153        if (!stats.cr)
 154                return 0;
 155
 156        /* only grow if not in place */
 157        if (strbuf_avail(buf) + buf->len < len)
 158                strbuf_grow(buf, len - buf->len);
 159        dst = buf->buf;
 160        if (action == CRLF_GUESS) {
 161                /*
 162                 * If we guessed, we already know we rejected a file with
 163                 * lone CR, and we can strip a CR without looking at what
 164                 * follow it.
 165                 */
 166                do {
 167                        unsigned char c = *src++;
 168                        if (c != '\r')
 169                                *dst++ = c;
 170                } while (--len);
 171        } else {
 172                do {
 173                        unsigned char c = *src++;
 174                        if (! (c == '\r' && (1 < len && *src == '\n')))
 175                                *dst++ = c;
 176                } while (--len);
 177        }
 178        strbuf_setlen(buf, dst - buf->buf);
 179        return 1;
 180}
 181
 182static int crlf_to_worktree(const char *path, const char *src, size_t len,
 183                            struct strbuf *buf, int action)
 184{
 185        char *to_free = NULL;
 186        struct text_stat stats;
 187
 188        if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
 189            auto_crlf <= 0)
 190                return 0;
 191
 192        if (!len)
 193                return 0;
 194
 195        gather_stats(src, len, &stats);
 196
 197        /* No LF? Nothing to convert, regardless. */
 198        if (!stats.lf)
 199                return 0;
 200
 201        /* Was it already in CRLF format? */
 202        if (stats.lf == stats.crlf)
 203                return 0;
 204
 205        if (action == CRLF_GUESS) {
 206                /* If we have any bare CR characters, we're not going to touch it */
 207                if (stats.cr != stats.crlf)
 208                        return 0;
 209
 210                if (is_binary(len, &stats))
 211                        return 0;
 212        }
 213
 214        /* are we "faking" in place editing ? */
 215        if (src == buf->buf)
 216                to_free = strbuf_detach(buf, NULL);
 217
 218        strbuf_grow(buf, len + stats.lf - stats.crlf);
 219        for (;;) {
 220                const char *nl = memchr(src, '\n', len);
 221                if (!nl)
 222                        break;
 223                if (nl > src && nl[-1] == '\r') {
 224                        strbuf_add(buf, src, nl + 1 - src);
 225                } else {
 226                        strbuf_add(buf, src, nl - src);
 227                        strbuf_addstr(buf, "\r\n");
 228                }
 229                len -= nl + 1 - src;
 230                src  = nl + 1;
 231        }
 232        strbuf_add(buf, src, len);
 233
 234        free(to_free);
 235        return 1;
 236}
 237
 238struct filter_params {
 239        const char *src;
 240        unsigned long size;
 241        const char *cmd;
 242};
 243
 244static int filter_buffer(int in, int out, void *data)
 245{
 246        /*
 247         * Spawn cmd and feed the buffer contents through its stdin.
 248         */
 249        struct child_process child_process;
 250        struct filter_params *params = (struct filter_params *)data;
 251        int write_err, status;
 252        const char *argv[] = { params->cmd, NULL };
 253
 254        memset(&child_process, 0, sizeof(child_process));
 255        child_process.argv = argv;
 256        child_process.use_shell = 1;
 257        child_process.in = -1;
 258        child_process.out = out;
 259
 260        if (start_command(&child_process))
 261                return error("cannot fork to run external filter %s", params->cmd);
 262
 263        write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
 264        if (close(child_process.in))
 265                write_err = 1;
 266        if (write_err)
 267                error("cannot feed the input to external filter %s", params->cmd);
 268
 269        status = finish_command(&child_process);
 270        if (status)
 271                error("external filter %s failed %d", params->cmd, status);
 272        return (write_err || status);
 273}
 274
 275static int apply_filter(const char *path, const char *src, size_t len,
 276                        struct strbuf *dst, const char *cmd)
 277{
 278        /*
 279         * Create a pipeline to have the command filter the buffer's
 280         * contents.
 281         *
 282         * (child --> cmd) --> us
 283         */
 284        int ret = 1;
 285        struct strbuf nbuf = STRBUF_INIT;
 286        struct async async;
 287        struct filter_params params;
 288
 289        if (!cmd)
 290                return 0;
 291
 292        memset(&async, 0, sizeof(async));
 293        async.proc = filter_buffer;
 294        async.data = &params;
 295        async.out = -1;
 296        params.src = src;
 297        params.size = len;
 298        params.cmd = cmd;
 299
 300        fflush(NULL);
 301        if (start_async(&async))
 302                return 0;       /* error was already reported */
 303
 304        if (strbuf_read(&nbuf, async.out, len) < 0) {
 305                error("read from external filter %s failed", cmd);
 306                ret = 0;
 307        }
 308        if (close(async.out)) {
 309                error("read from external filter %s failed", cmd);
 310                ret = 0;
 311        }
 312        if (finish_async(&async)) {
 313                error("external filter %s failed", cmd);
 314                ret = 0;
 315        }
 316
 317        if (ret) {
 318                strbuf_swap(dst, &nbuf);
 319        }
 320        strbuf_release(&nbuf);
 321        return ret;
 322}
 323
 324static struct convert_driver {
 325        const char *name;
 326        struct convert_driver *next;
 327        const char *smudge;
 328        const char *clean;
 329} *user_convert, **user_convert_tail;
 330
 331static int read_convert_config(const char *var, const char *value, void *cb)
 332{
 333        const char *ep, *name;
 334        int namelen;
 335        struct convert_driver *drv;
 336
 337        /*
 338         * External conversion drivers are configured using
 339         * "filter.<name>.variable".
 340         */
 341        if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
 342                return 0;
 343        name = var + 7;
 344        namelen = ep - name;
 345        for (drv = user_convert; drv; drv = drv->next)
 346                if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
 347                        break;
 348        if (!drv) {
 349                drv = xcalloc(1, sizeof(struct convert_driver));
 350                drv->name = xmemdupz(name, namelen);
 351                *user_convert_tail = drv;
 352                user_convert_tail = &(drv->next);
 353        }
 354
 355        ep++;
 356
 357        /*
 358         * filter.<name>.smudge and filter.<name>.clean specifies
 359         * the command line:
 360         *
 361         *      command-line
 362         *
 363         * The command-line will not be interpolated in any way.
 364         */
 365
 366        if (!strcmp("smudge", ep))
 367                return git_config_string(&drv->smudge, var, value);
 368
 369        if (!strcmp("clean", ep))
 370                return git_config_string(&drv->clean, var, value);
 371
 372        return 0;
 373}
 374
 375static void setup_convert_check(struct git_attr_check *check)
 376{
 377        static struct git_attr *attr_crlf;
 378        static struct git_attr *attr_ident;
 379        static struct git_attr *attr_filter;
 380
 381        if (!attr_crlf) {
 382                attr_crlf = git_attr("crlf");
 383                attr_ident = git_attr("ident");
 384                attr_filter = git_attr("filter");
 385                user_convert_tail = &user_convert;
 386                git_config(read_convert_config, NULL);
 387        }
 388        check[0].attr = attr_crlf;
 389        check[1].attr = attr_ident;
 390        check[2].attr = attr_filter;
 391}
 392
 393static int count_ident(const char *cp, unsigned long size)
 394{
 395        /*
 396         * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
 397         */
 398        int cnt = 0;
 399        char ch;
 400
 401        while (size) {
 402                ch = *cp++;
 403                size--;
 404                if (ch != '$')
 405                        continue;
 406                if (size < 3)
 407                        break;
 408                if (memcmp("Id", cp, 2))
 409                        continue;
 410                ch = cp[2];
 411                cp += 3;
 412                size -= 3;
 413                if (ch == '$')
 414                        cnt++; /* $Id$ */
 415                if (ch != ':')
 416                        continue;
 417
 418                /*
 419                 * "$Id: ... "; scan up to the closing dollar sign and discard.
 420                 */
 421                while (size) {
 422                        ch = *cp++;
 423                        size--;
 424                        if (ch == '$') {
 425                                cnt++;
 426                                break;
 427                        }
 428                        if (ch == '\n')
 429                                break;
 430                }
 431        }
 432        return cnt;
 433}
 434
 435static int ident_to_git(const char *path, const char *src, size_t len,
 436                        struct strbuf *buf, int ident)
 437{
 438        char *dst, *dollar;
 439
 440        if (!ident || !count_ident(src, len))
 441                return 0;
 442
 443        /* only grow if not in place */
 444        if (strbuf_avail(buf) + buf->len < len)
 445                strbuf_grow(buf, len - buf->len);
 446        dst = buf->buf;
 447        for (;;) {
 448                dollar = memchr(src, '$', len);
 449                if (!dollar)
 450                        break;
 451                memcpy(dst, src, dollar + 1 - src);
 452                dst += dollar + 1 - src;
 453                len -= dollar + 1 - src;
 454                src  = dollar + 1;
 455
 456                if (len > 3 && !memcmp(src, "Id:", 3)) {
 457                        dollar = memchr(src + 3, '$', len - 3);
 458                        if (!dollar)
 459                                break;
 460                        if (memchr(src + 3, '\n', dollar - src - 3)) {
 461                                /* Line break before the next dollar. */
 462                                continue;
 463                        }
 464
 465                        memcpy(dst, "Id$", 3);
 466                        dst += 3;
 467                        len -= dollar + 1 - src;
 468                        src  = dollar + 1;
 469                }
 470        }
 471        memcpy(dst, src, len);
 472        strbuf_setlen(buf, dst + len - buf->buf);
 473        return 1;
 474}
 475
 476static int ident_to_worktree(const char *path, const char *src, size_t len,
 477                             struct strbuf *buf, int ident)
 478{
 479        unsigned char sha1[20];
 480        char *to_free = NULL, *dollar, *spc;
 481        int cnt;
 482
 483        if (!ident)
 484                return 0;
 485
 486        cnt = count_ident(src, len);
 487        if (!cnt)
 488                return 0;
 489
 490        /* are we "faking" in place editing ? */
 491        if (src == buf->buf)
 492                to_free = strbuf_detach(buf, NULL);
 493        hash_sha1_file(src, len, "blob", sha1);
 494
 495        strbuf_grow(buf, len + cnt * 43);
 496        for (;;) {
 497                /* step 1: run to the next '$' */
 498                dollar = memchr(src, '$', len);
 499                if (!dollar)
 500                        break;
 501                strbuf_add(buf, src, dollar + 1 - src);
 502                len -= dollar + 1 - src;
 503                src  = dollar + 1;
 504
 505                /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
 506                if (len < 3 || memcmp("Id", src, 2))
 507                        continue;
 508
 509                /* step 3: skip over Id$ or Id:xxxxx$ */
 510                if (src[2] == '$') {
 511                        src += 3;
 512                        len -= 3;
 513                } else if (src[2] == ':') {
 514                        /*
 515                         * It's possible that an expanded Id has crept its way into the
 516                         * repository, we cope with that by stripping the expansion out.
 517                         * This is probably not a good idea, since it will cause changes
 518                         * on checkout, which won't go away by stash, but let's keep it
 519                         * for git-style ids.
 520                         */
 521                        dollar = memchr(src + 3, '$', len - 3);
 522                        if (!dollar) {
 523                                /* incomplete keyword, no more '$', so just quit the loop */
 524                                break;
 525                        }
 526
 527                        if (memchr(src + 3, '\n', dollar - src - 3)) {
 528                                /* Line break before the next dollar. */
 529                                continue;
 530                        }
 531
 532                        spc = memchr(src + 4, ' ', dollar - src - 4);
 533                        if (spc && spc < dollar-1) {
 534                                /* There are spaces in unexpected places.
 535                                 * This is probably an id from some other
 536                                 * versioning system. Keep it for now.
 537                                 */
 538                                continue;
 539                        }
 540
 541                        len -= dollar + 1 - src;
 542                        src  = dollar + 1;
 543                } else {
 544                        /* it wasn't a "Id$" or "Id:xxxx$" */
 545                        continue;
 546                }
 547
 548                /* step 4: substitute */
 549                strbuf_addstr(buf, "Id: ");
 550                strbuf_add(buf, sha1_to_hex(sha1), 40);
 551                strbuf_addstr(buf, " $");
 552        }
 553        strbuf_add(buf, src, len);
 554
 555        free(to_free);
 556        return 1;
 557}
 558
 559static int git_path_check_crlf(const char *path, struct git_attr_check *check)
 560{
 561        const char *value = check->value;
 562
 563        if (ATTR_TRUE(value))
 564                return CRLF_TEXT;
 565        else if (ATTR_FALSE(value))
 566                return CRLF_BINARY;
 567        else if (ATTR_UNSET(value))
 568                ;
 569        else if (!strcmp(value, "input"))
 570                return CRLF_INPUT;
 571        return CRLF_GUESS;
 572}
 573
 574static struct convert_driver *git_path_check_convert(const char *path,
 575                                             struct git_attr_check *check)
 576{
 577        const char *value = check->value;
 578        struct convert_driver *drv;
 579
 580        if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
 581                return NULL;
 582        for (drv = user_convert; drv; drv = drv->next)
 583                if (!strcmp(value, drv->name))
 584                        return drv;
 585        return NULL;
 586}
 587
 588static int git_path_check_ident(const char *path, struct git_attr_check *check)
 589{
 590        const char *value = check->value;
 591
 592        return !!ATTR_TRUE(value);
 593}
 594
 595int convert_to_git(const char *path, const char *src, size_t len,
 596                   struct strbuf *dst, enum safe_crlf checksafe)
 597{
 598        struct git_attr_check check[3];
 599        int crlf = CRLF_GUESS;
 600        int ident = 0, ret = 0;
 601        const char *filter = NULL;
 602
 603        setup_convert_check(check);
 604        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 605                struct convert_driver *drv;
 606                crlf = git_path_check_crlf(path, check + 0);
 607                ident = git_path_check_ident(path, check + 1);
 608                drv = git_path_check_convert(path, check + 2);
 609                if (drv && drv->clean)
 610                        filter = drv->clean;
 611        }
 612
 613        ret |= apply_filter(path, src, len, dst, filter);
 614        if (ret) {
 615                src = dst->buf;
 616                len = dst->len;
 617        }
 618        ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
 619        if (ret) {
 620                src = dst->buf;
 621                len = dst->len;
 622        }
 623        return ret | ident_to_git(path, src, len, dst, ident);
 624}
 625
 626int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 627{
 628        struct git_attr_check check[3];
 629        int crlf = CRLF_GUESS;
 630        int ident = 0, ret = 0;
 631        const char *filter = NULL;
 632
 633        setup_convert_check(check);
 634        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 635                struct convert_driver *drv;
 636                crlf = git_path_check_crlf(path, check + 0);
 637                ident = git_path_check_ident(path, check + 1);
 638                drv = git_path_check_convert(path, check + 2);
 639                if (drv && drv->smudge)
 640                        filter = drv->smudge;
 641        }
 642
 643        ret |= ident_to_worktree(path, src, len, dst, ident);
 644        if (ret) {
 645                src = dst->buf;
 646                len = dst->len;
 647        }
 648        ret |= crlf_to_worktree(path, src, len, dst, crlf);
 649        if (ret) {
 650                src = dst->buf;
 651                len = dst->len;
 652        }
 653        return ret | apply_filter(path, src, len, dst, filter);
 654}