convert.con commit Rewrite convert_to_{git,working_tree} to use strbuf's. (5ecd293)
   1#include "cache.h"
   2#include "attr.h"
   3#include "run-command.h"
   4#include "strbuf.h"
   5
   6/*
   7 * convert.c - convert a file when checking it out and checking it in.
   8 *
   9 * This should use the pathname to decide on whether it wants to do some
  10 * more interesting conversions (automatic gzip/unzip, general format
  11 * conversions etc etc), but by default it just does automatic CRLF<->LF
  12 * translation when the "auto_crlf" option is set.
  13 */
  14
  15#define CRLF_GUESS      (-1)
  16#define CRLF_BINARY     0
  17#define CRLF_TEXT       1
  18#define CRLF_INPUT      2
  19
  20struct text_stat {
  21        /* CR, LF and CRLF counts */
  22        unsigned cr, lf, crlf;
  23
  24        /* These are just approximations! */
  25        unsigned printable, nonprintable;
  26};
  27
  28static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
  29{
  30        unsigned long i;
  31
  32        memset(stats, 0, sizeof(*stats));
  33
  34        for (i = 0; i < size; i++) {
  35                unsigned char c = buf[i];
  36                if (c == '\r') {
  37                        stats->cr++;
  38                        if (i+1 < size && buf[i+1] == '\n')
  39                                stats->crlf++;
  40                        continue;
  41                }
  42                if (c == '\n') {
  43                        stats->lf++;
  44                        continue;
  45                }
  46                if (c == 127)
  47                        /* DEL */
  48                        stats->nonprintable++;
  49                else if (c < 32) {
  50                        switch (c) {
  51                                /* BS, HT, ESC and FF */
  52                        case '\b': case '\t': case '\033': case '\014':
  53                                stats->printable++;
  54                                break;
  55                        default:
  56                                stats->nonprintable++;
  57                        }
  58                }
  59                else
  60                        stats->printable++;
  61        }
  62}
  63
  64/*
  65 * The same heuristics as diff.c::mmfile_is_binary()
  66 */
  67static int is_binary(unsigned long size, struct text_stat *stats)
  68{
  69
  70        if ((stats->printable >> 7) < stats->nonprintable)
  71                return 1;
  72        /*
  73         * Other heuristics? Average line length might be relevant,
  74         * as might LF vs CR vs CRLF counts..
  75         *
  76         * NOTE! It might be normal to have a low ratio of CRLF to LF
  77         * (somebody starts with a LF-only file and edits it with an editor
  78         * that adds CRLF only to lines that are added..). But do  we
  79         * want to support CR-only? Probably not.
  80         */
  81        return 0;
  82}
  83
  84static int crlf_to_git(const char *path, const char *src, size_t len,
  85                       struct strbuf *buf, int action)
  86{
  87        struct text_stat stats;
  88        char *dst;
  89
  90        if ((action == CRLF_BINARY) || !auto_crlf || !len)
  91                return 0;
  92
  93        gather_stats(src, len, &stats);
  94        /* No CR? Nothing to convert, regardless. */
  95        if (!stats.cr)
  96                return 0;
  97
  98        if (action == CRLF_GUESS) {
  99                /*
 100                 * We're currently not going to even try to convert stuff
 101                 * that has bare CR characters. Does anybody do that crazy
 102                 * stuff?
 103                 */
 104                if (stats.cr != stats.crlf)
 105                        return 0;
 106
 107                /*
 108                 * And add some heuristics for binary vs text, of course...
 109                 */
 110                if (is_binary(len, &stats))
 111                        return 0;
 112        }
 113
 114        strbuf_grow(buf, len);
 115        dst = buf->buf;
 116        if (action == CRLF_GUESS) {
 117                /*
 118                 * If we guessed, we already know we rejected a file with
 119                 * lone CR, and we can strip a CR without looking at what
 120                 * follow it.
 121                 */
 122                do {
 123                        unsigned char c = *src++;
 124                        if (c != '\r')
 125                                *dst++ = c;
 126                } while (--len);
 127        } else {
 128                do {
 129                        unsigned char c = *src++;
 130                        if (! (c == '\r' && (1 < len && *src == '\n')))
 131                                *dst++ = c;
 132                } while (--len);
 133        }
 134        strbuf_setlen(buf, dst - buf->buf);
 135        return 1;
 136}
 137
 138static int crlf_to_worktree(const char *path, const char *src, size_t len,
 139                            struct strbuf *buf, int action)
 140{
 141        char *to_free = NULL;
 142        struct text_stat stats;
 143
 144        if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
 145            auto_crlf <= 0)
 146                return 0;
 147
 148        if (!len)
 149                return 0;
 150
 151        gather_stats(src, len, &stats);
 152
 153        /* No LF? Nothing to convert, regardless. */
 154        if (!stats.lf)
 155                return 0;
 156
 157        /* Was it already in CRLF format? */
 158        if (stats.lf == stats.crlf)
 159                return 0;
 160
 161        if (action == CRLF_GUESS) {
 162                /* If we have any bare CR characters, we're not going to touch it */
 163                if (stats.cr != stats.crlf)
 164                        return 0;
 165
 166                if (is_binary(len, &stats))
 167                        return 0;
 168        }
 169
 170        /* are we "faking" in place editing ? */
 171        if (src == buf->buf)
 172                to_free = strbuf_detach(buf);
 173
 174        strbuf_grow(buf, len + stats.lf - stats.crlf);
 175        for (;;) {
 176                const char *nl = memchr(src, '\n', len);
 177                if (!nl)
 178                        break;
 179                if (nl > src && nl[-1] == '\r') {
 180                        strbuf_add(buf, src, nl + 1 - src);
 181                } else {
 182                        strbuf_add(buf, src, nl - src);
 183                        strbuf_addstr(buf, "\r\n");
 184                }
 185                len -= nl + 1 - src;
 186                src  = nl + 1;
 187        }
 188        strbuf_add(buf, src, len);
 189
 190        free(to_free);
 191        return 1;
 192}
 193
 194static int filter_buffer(const char *path, const char *src,
 195                         unsigned long size, const char *cmd)
 196{
 197        /*
 198         * Spawn cmd and feed the buffer contents through its stdin.
 199         */
 200        struct child_process child_process;
 201        int pipe_feed[2];
 202        int write_err, status;
 203
 204        memset(&child_process, 0, sizeof(child_process));
 205
 206        if (pipe(pipe_feed) < 0) {
 207                error("cannot create pipe to run external filter %s", cmd);
 208                return 1;
 209        }
 210
 211        child_process.pid = fork();
 212        if (child_process.pid < 0) {
 213                error("cannot fork to run external filter %s", cmd);
 214                close(pipe_feed[0]);
 215                close(pipe_feed[1]);
 216                return 1;
 217        }
 218        if (!child_process.pid) {
 219                dup2(pipe_feed[0], 0);
 220                close(pipe_feed[0]);
 221                close(pipe_feed[1]);
 222                execlp("sh", "sh", "-c", cmd, NULL);
 223                return 1;
 224        }
 225        close(pipe_feed[0]);
 226
 227        write_err = (write_in_full(pipe_feed[1], src, size) < 0);
 228        if (close(pipe_feed[1]))
 229                write_err = 1;
 230        if (write_err)
 231                error("cannot feed the input to external filter %s", cmd);
 232
 233        status = finish_command(&child_process);
 234        if (status)
 235                error("external filter %s failed %d", cmd, -status);
 236        return (write_err || status);
 237}
 238
 239static int apply_filter(const char *path, const char *src, size_t len,
 240                        struct strbuf *dst, const char *cmd)
 241{
 242        /*
 243         * Create a pipeline to have the command filter the buffer's
 244         * contents.
 245         *
 246         * (child --> cmd) --> us
 247         */
 248        int pipe_feed[2];
 249        int status, ret = 1;
 250        struct child_process child_process;
 251        struct strbuf nbuf;
 252
 253        if (!cmd)
 254                return 0;
 255
 256        memset(&child_process, 0, sizeof(child_process));
 257
 258        if (pipe(pipe_feed) < 0) {
 259                error("cannot create pipe to run external filter %s", cmd);
 260                return 0;
 261        }
 262
 263        fflush(NULL);
 264        child_process.pid = fork();
 265        if (child_process.pid < 0) {
 266                error("cannot fork to run external filter %s", cmd);
 267                close(pipe_feed[0]);
 268                close(pipe_feed[1]);
 269                return 0;
 270        }
 271        if (!child_process.pid) {
 272                dup2(pipe_feed[1], 1);
 273                close(pipe_feed[0]);
 274                close(pipe_feed[1]);
 275                exit(filter_buffer(path, src, len, cmd));
 276        }
 277        close(pipe_feed[1]);
 278
 279        strbuf_init(&nbuf, 0);
 280        if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
 281                error("read from external filter %s failed", cmd);
 282                ret = 0;
 283        }
 284        if (close(pipe_feed[0])) {
 285                ret = error("read from external filter %s failed", cmd);
 286                ret = 0;
 287        }
 288        status = finish_command(&child_process);
 289        if (status) {
 290                ret = error("external filter %s failed %d", cmd, -status);
 291                ret = 0;
 292        }
 293
 294        if (ret) {
 295                *dst = nbuf;
 296        } else {
 297                strbuf_release(&nbuf);
 298        }
 299        return ret;
 300}
 301
 302static struct convert_driver {
 303        const char *name;
 304        struct convert_driver *next;
 305        char *smudge;
 306        char *clean;
 307} *user_convert, **user_convert_tail;
 308
 309static int read_convert_config(const char *var, const char *value)
 310{
 311        const char *ep, *name;
 312        int namelen;
 313        struct convert_driver *drv;
 314
 315        /*
 316         * External conversion drivers are configured using
 317         * "filter.<name>.variable".
 318         */
 319        if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
 320                return 0;
 321        name = var + 7;
 322        namelen = ep - name;
 323        for (drv = user_convert; drv; drv = drv->next)
 324                if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
 325                        break;
 326        if (!drv) {
 327                char *namebuf;
 328                drv = xcalloc(1, sizeof(struct convert_driver));
 329                namebuf = xmalloc(namelen + 1);
 330                memcpy(namebuf, name, namelen);
 331                namebuf[namelen] = 0;
 332                drv->name = namebuf;
 333                drv->next = NULL;
 334                *user_convert_tail = drv;
 335                user_convert_tail = &(drv->next);
 336        }
 337
 338        ep++;
 339
 340        /*
 341         * filter.<name>.smudge and filter.<name>.clean specifies
 342         * the command line:
 343         *
 344         *      command-line
 345         *
 346         * The command-line will not be interpolated in any way.
 347         */
 348
 349        if (!strcmp("smudge", ep)) {
 350                if (!value)
 351                        return error("%s: lacks value", var);
 352                drv->smudge = strdup(value);
 353                return 0;
 354        }
 355
 356        if (!strcmp("clean", ep)) {
 357                if (!value)
 358                        return error("%s: lacks value", var);
 359                drv->clean = strdup(value);
 360                return 0;
 361        }
 362        return 0;
 363}
 364
 365static void setup_convert_check(struct git_attr_check *check)
 366{
 367        static struct git_attr *attr_crlf;
 368        static struct git_attr *attr_ident;
 369        static struct git_attr *attr_filter;
 370
 371        if (!attr_crlf) {
 372                attr_crlf = git_attr("crlf", 4);
 373                attr_ident = git_attr("ident", 5);
 374                attr_filter = git_attr("filter", 6);
 375                user_convert_tail = &user_convert;
 376                git_config(read_convert_config);
 377        }
 378        check[0].attr = attr_crlf;
 379        check[1].attr = attr_ident;
 380        check[2].attr = attr_filter;
 381}
 382
 383static int count_ident(const char *cp, unsigned long size)
 384{
 385        /*
 386         * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
 387         */
 388        int cnt = 0;
 389        char ch;
 390
 391        while (size) {
 392                ch = *cp++;
 393                size--;
 394                if (ch != '$')
 395                        continue;
 396                if (size < 3)
 397                        break;
 398                if (memcmp("Id", cp, 2))
 399                        continue;
 400                ch = cp[2];
 401                cp += 3;
 402                size -= 3;
 403                if (ch == '$')
 404                        cnt++; /* $Id$ */
 405                if (ch != ':')
 406                        continue;
 407
 408                /*
 409                 * "$Id: ... "; scan up to the closing dollar sign and discard.
 410                 */
 411                while (size) {
 412                        ch = *cp++;
 413                        size--;
 414                        if (ch == '$') {
 415                                cnt++;
 416                                break;
 417                        }
 418                }
 419        }
 420        return cnt;
 421}
 422
 423static int ident_to_git(const char *path, const char *src, size_t len,
 424                        struct strbuf *buf, int ident)
 425{
 426        char *dst, *dollar;
 427
 428        if (!ident || !count_ident(src, len))
 429                return 0;
 430
 431        strbuf_grow(buf, len);
 432        dst = buf->buf;
 433        for (;;) {
 434                dollar = memchr(src, '$', len);
 435                if (!dollar)
 436                        break;
 437                memcpy(dst, src, dollar + 1 - src);
 438                dst += dollar + 1 - src;
 439                len -= dollar + 1 - src;
 440                src  = dollar + 1;
 441
 442                if (len > 3 && !memcmp(src, "Id:", 3)) {
 443                        dollar = memchr(src + 3, '$', len - 3);
 444                        if (!dollar)
 445                                break;
 446                        memcpy(dst, "Id$", 3);
 447                        dst += 3;
 448                        len -= dollar + 1 - src;
 449                        src  = dollar + 1;
 450                }
 451        }
 452        memcpy(dst, src, len);
 453        strbuf_setlen(buf, dst + len - buf->buf);
 454        return 1;
 455}
 456
 457static int ident_to_worktree(const char *path, const char *src, size_t len,
 458                             struct strbuf *buf, int ident)
 459{
 460        unsigned char sha1[20];
 461        char *to_free = NULL, *dollar;
 462        int cnt;
 463
 464        if (!ident)
 465                return 0;
 466
 467        cnt = count_ident(src, len);
 468        if (!cnt)
 469                return 0;
 470
 471        /* are we "faking" in place editing ? */
 472        if (src == buf->buf)
 473                to_free = strbuf_detach(buf);
 474        hash_sha1_file(src, len, "blob", sha1);
 475
 476        strbuf_grow(buf, len + cnt * 43);
 477        for (;;) {
 478                /* step 1: run to the next '$' */
 479                dollar = memchr(src, '$', len);
 480                if (!dollar)
 481                        break;
 482                strbuf_add(buf, src, dollar + 1 - src);
 483                len -= dollar + 1 - src;
 484                src  = dollar + 1;
 485
 486                /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
 487                if (len < 3 || memcmp("Id", src, 2))
 488                        continue;
 489
 490                /* step 3: skip over Id$ or Id:xxxxx$ */
 491                if (src[2] == '$') {
 492                        src += 3;
 493                        len -= 3;
 494                } else if (src[2] == ':') {
 495                        /*
 496                         * It's possible that an expanded Id has crept its way into the
 497                         * repository, we cope with that by stripping the expansion out
 498                         */
 499                        dollar = memchr(src + 3, '$', len - 3);
 500                        if (!dollar) {
 501                                /* incomplete keyword, no more '$', so just quit the loop */
 502                                break;
 503                        }
 504
 505                        len -= dollar + 1 - src;
 506                        src  = dollar + 1;
 507                } else {
 508                        /* it wasn't a "Id$" or "Id:xxxx$" */
 509                        continue;
 510                }
 511
 512                /* step 4: substitute */
 513                strbuf_addstr(buf, "Id: ");
 514                strbuf_add(buf, sha1_to_hex(sha1), 40);
 515                strbuf_addstr(buf, " $");
 516        }
 517        strbuf_add(buf, src, len);
 518
 519        free(to_free);
 520        return 1;
 521}
 522
 523static int git_path_check_crlf(const char *path, struct git_attr_check *check)
 524{
 525        const char *value = check->value;
 526
 527        if (ATTR_TRUE(value))
 528                return CRLF_TEXT;
 529        else if (ATTR_FALSE(value))
 530                return CRLF_BINARY;
 531        else if (ATTR_UNSET(value))
 532                ;
 533        else if (!strcmp(value, "input"))
 534                return CRLF_INPUT;
 535        return CRLF_GUESS;
 536}
 537
 538static struct convert_driver *git_path_check_convert(const char *path,
 539                                             struct git_attr_check *check)
 540{
 541        const char *value = check->value;
 542        struct convert_driver *drv;
 543
 544        if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
 545                return NULL;
 546        for (drv = user_convert; drv; drv = drv->next)
 547                if (!strcmp(value, drv->name))
 548                        return drv;
 549        return NULL;
 550}
 551
 552static int git_path_check_ident(const char *path, struct git_attr_check *check)
 553{
 554        const char *value = check->value;
 555
 556        return !!ATTR_TRUE(value);
 557}
 558
 559int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
 560{
 561        struct git_attr_check check[3];
 562        int crlf = CRLF_GUESS;
 563        int ident = 0, ret = 0;
 564        char *filter = NULL;
 565
 566        setup_convert_check(check);
 567        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 568                struct convert_driver *drv;
 569                crlf = git_path_check_crlf(path, check + 0);
 570                ident = git_path_check_ident(path, check + 1);
 571                drv = git_path_check_convert(path, check + 2);
 572                if (drv && drv->clean)
 573                        filter = drv->clean;
 574        }
 575
 576        ret |= apply_filter(path, src, len, dst, filter);
 577        if (ret) {
 578                src = dst->buf;
 579                len = dst->len;
 580        }
 581        ret |= crlf_to_git(path, src, len, dst, crlf);
 582        if (ret) {
 583                src = dst->buf;
 584                len = dst->len;
 585        }
 586        return ret | ident_to_git(path, src, len, dst, ident);
 587}
 588
 589int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 590{
 591        struct git_attr_check check[3];
 592        int crlf = CRLF_GUESS;
 593        int ident = 0, ret = 0;
 594        char *filter = NULL;
 595
 596        setup_convert_check(check);
 597        if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 598                struct convert_driver *drv;
 599                crlf = git_path_check_crlf(path, check + 0);
 600                ident = git_path_check_ident(path, check + 1);
 601                drv = git_path_check_convert(path, check + 2);
 602                if (drv && drv->smudge)
 603                        filter = drv->smudge;
 604        }
 605
 606        ret |= ident_to_worktree(path, src, len, dst, ident);
 607        if (ret) {
 608                src = dst->buf;
 609                len = dst->len;
 610        }
 611        ret |= crlf_to_worktree(path, src, len, dst, crlf);
 612        if (ret) {
 613                src = dst->buf;
 614                len = dst->len;
 615        }
 616        return ret | apply_filter(path, src, len, dst, filter);
 617}