apply.con commit git-apply: make the diffstat output happen for "--stat" only. (fab2c25)
   1/*
   2 * apply.c
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This applies patches on top of some (arbitrary) version of the SCM.
   7 *
   8 * NOTE! It does all its work in the index file, and only cares about
   9 * the files in the working directory if you tell it to "merge" the
  10 * patch apply.
  11 *
  12 * Even when merging it always takes the source from the index, and
  13 * uses the working tree as a "branch" for a 3-way merge.
  14 */
  15#include <ctype.h>
  16
  17#include "cache.h"
  18
  19// We default to the merge behaviour, since that's what most people would
  20// expect
  21static int merge_patch = 1;
  22static int diffstat = 0;
  23static int check = 1;
  24static const char apply_usage[] = "git-apply <patch>";
  25
  26/*
  27 * For "diff-stat" like behaviour, we keep track of the biggest change
  28 * we've seen, and the longest filename. That allows us to do simple
  29 * scaling.
  30 */
  31static int max_change, max_len;
  32
  33/*
  34 * Various "current state", notably line numbers and what
  35 * file (and how) we're patching right now.. The "is_xxxx"
  36 * things are flags, where -1 means "don't know yet".
  37 */
  38static int linenr = 1;
  39static char *def_name = NULL;
  40
  41struct fragment {
  42        unsigned long oldpos, oldlines;
  43        unsigned long newpos, newlines;
  44        const char *patch;
  45        int size;
  46        struct fragment *next;
  47};
  48
  49struct patch {
  50        char *new_name, *old_name;
  51        unsigned int old_mode, new_mode;
  52        int is_rename, is_copy, is_new, is_delete;
  53        int lines_added, lines_deleted;
  54        struct fragment *fragments;
  55        struct patch *next;
  56};
  57
  58#define CHUNKSIZE (8192)
  59#define SLOP (16)
  60
  61static void *read_patch_file(int fd, unsigned long *sizep)
  62{
  63        unsigned long size = 0, alloc = CHUNKSIZE;
  64        void *buffer = xmalloc(alloc);
  65
  66        for (;;) {
  67                int nr = alloc - size;
  68                if (nr < 1024) {
  69                        alloc += CHUNKSIZE;
  70                        buffer = xrealloc(buffer, alloc);
  71                        nr = alloc - size;
  72                }
  73                nr = read(fd, buffer + size, nr);
  74                if (!nr)
  75                        break;
  76                if (nr < 0) {
  77                        if (errno == EAGAIN)
  78                                continue;
  79                        die("git-apply: read returned %s", strerror(errno));
  80                }
  81                size += nr;
  82        }
  83        *sizep = size;
  84
  85        /*
  86         * Make sure that we have some slop in the buffer
  87         * so that we can do speculative "memcmp" etc, and
  88         * see to it that it is NUL-filled.
  89         */
  90        if (alloc < size + SLOP)
  91                buffer = xrealloc(buffer, size + SLOP);
  92        memset(buffer + size, 0, SLOP);
  93        return buffer;
  94}
  95
  96static unsigned long linelen(char *buffer, unsigned long size)
  97{
  98        unsigned long len = 0;
  99        while (size--) {
 100                len++;
 101                if (*buffer++ == '\n')
 102                        break;
 103        }
 104        return len;
 105}
 106
 107static int is_dev_null(const char *str)
 108{
 109        return !memcmp("/dev/null", str, 9) && isspace(str[9]);
 110}
 111
 112#define TERM_EXIST      1
 113#define TERM_SPACE      2
 114#define TERM_TAB        4
 115
 116static int name_terminate(const char *name, int namelen, int c, int terminate)
 117{
 118        if (c == ' ' && !(terminate & TERM_SPACE))
 119                return 0;
 120        if (c == '\t' && !(terminate & TERM_TAB))
 121                return 0;
 122
 123        /*
 124         * Do we want an existing name? Return false and
 125         * continue if it's not there.
 126         */
 127        if (terminate & TERM_EXIST)
 128                return cache_name_pos(name, namelen) >= 0;
 129
 130        return 1;
 131}
 132
 133static char * find_name(const char *line, char *def, int p_value, int terminate)
 134{
 135        int len;
 136        const char *start = line;
 137        char *name;
 138
 139        for (;;) {
 140                char c = *line;
 141
 142                if (isspace(c)) {
 143                        if (c == '\n')
 144                                break;
 145                        if (name_terminate(start, line-start, c, terminate))
 146                                break;
 147                }
 148                line++;
 149                if (c == '/' && !--p_value)
 150                        start = line;
 151        }
 152        if (!start)
 153                return def;
 154        len = line - start;
 155        if (!len)
 156                return def;
 157
 158        /*
 159         * Generally we prefer the shorter name, especially
 160         * if the other one is just a variation of that with
 161         * something else tacked on to the end (ie "file.orig"
 162         * or "file~").
 163         */
 164        if (def) {
 165                int deflen = strlen(def);
 166                if (deflen < len && !strncmp(start, def, deflen))
 167                        return def;
 168        }
 169
 170        name = xmalloc(len + 1);
 171        memcpy(name, start, len);
 172        name[len] = 0;
 173        free(def);
 174        return name;
 175}
 176
 177/*
 178 * Get the name etc info from the --/+++ lines of a traditional patch header
 179 *
 180 * NOTE! This hardcodes "-p1" behaviour in filename detection.
 181 *
 182 * FIXME! The end-of-filename heuristics are kind of screwy. For existing
 183 * files, we can happily check the index for a match, but for creating a
 184 * new file we should try to match whatever "patch" does. I have no idea.
 185 */
 186static void parse_traditional_patch(const char *first, const char *second, struct patch *patch)
 187{
 188        int p_value = 1;
 189        char *name;
 190
 191        first += 4;     // skip "--- "
 192        second += 4;    // skip "+++ "
 193        if (is_dev_null(first)) {
 194                patch->is_new = 1;
 195                patch->is_delete = 0;
 196                name = find_name(second, def_name, p_value, TERM_SPACE | TERM_TAB);
 197                patch->new_name = name;
 198        } else if (is_dev_null(second)) {
 199                patch->is_new = 0;
 200                patch->is_delete = 1;
 201                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 202                patch->old_name = name;
 203        } else {
 204                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 205                name = find_name(second, name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 206                patch->old_name = patch->new_name = name;
 207        }
 208        if (!name)
 209                die("unable to find filename in patch at line %d", linenr);
 210}
 211
 212static int gitdiff_hdrend(const char *line, struct patch *patch)
 213{
 214        return -1;
 215}
 216
 217/*
 218 * We're anal about diff header consistency, to make
 219 * sure that we don't end up having strange ambiguous
 220 * patches floating around.
 221 *
 222 * As a result, gitdiff_{old|new}name() will check
 223 * their names against any previous information, just
 224 * to make sure..
 225 */
 226static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew)
 227{
 228        int len;
 229        const char *name;
 230
 231        if (!orig_name && !isnull)
 232                return find_name(line, NULL, 1, 0);
 233
 234        name = "/dev/null";
 235        len = 9;
 236        if (orig_name) {
 237                name = orig_name;
 238                len = strlen(name);
 239                if (isnull)
 240                        die("git-apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr);
 241        }
 242
 243        if (*name == '/')
 244                goto absolute_path;
 245
 246        for (;;) {
 247                char c = *line++;
 248                if (c == '\n')
 249                        break;
 250                if (c != '/')
 251                        continue;
 252absolute_path:
 253                if (memcmp(line, name, len) || line[len] != '\n')
 254                        break;
 255                return orig_name;
 256        }
 257        die("git-apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr);
 258        return NULL;
 259}
 260
 261static int gitdiff_oldname(const char *line, struct patch *patch)
 262{
 263        patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old");
 264        return 0;
 265}
 266
 267static int gitdiff_newname(const char *line, struct patch *patch)
 268{
 269        patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new");
 270        return 0;
 271}
 272
 273static int gitdiff_oldmode(const char *line, struct patch *patch)
 274{
 275        patch->old_mode = strtoul(line, NULL, 8);
 276        return 0;
 277}
 278
 279static int gitdiff_newmode(const char *line, struct patch *patch)
 280{
 281        patch->new_mode = strtoul(line, NULL, 8);
 282        return 0;
 283}
 284
 285static int gitdiff_delete(const char *line, struct patch *patch)
 286{
 287        patch->is_delete = 1;
 288        return gitdiff_oldmode(line, patch);
 289}
 290
 291static int gitdiff_newfile(const char *line, struct patch *patch)
 292{
 293        patch->is_new = 1;
 294        return gitdiff_newmode(line, patch);
 295}
 296
 297static int gitdiff_copysrc(const char *line, struct patch *patch)
 298{
 299        patch->is_copy = 1;
 300        patch->old_name = find_name(line, NULL, 0, 0);
 301        return 0;
 302}
 303
 304static int gitdiff_copydst(const char *line, struct patch *patch)
 305{
 306        patch->is_copy = 1;
 307        patch->new_name = find_name(line, NULL, 0, 0);
 308        return 0;
 309}
 310
 311static int gitdiff_renamesrc(const char *line, struct patch *patch)
 312{
 313        patch->is_rename = 1;
 314        patch->old_name = find_name(line, NULL, 0, 0);
 315        return 0;
 316}
 317
 318static int gitdiff_renamedst(const char *line, struct patch *patch)
 319{
 320        patch->is_rename = 1;
 321        patch->new_name = find_name(line, NULL, 0, 0);
 322        return 0;
 323}
 324
 325static int gitdiff_similarity(const char *line, struct patch *patch)
 326{
 327        return 0;
 328}
 329
 330/*
 331 * This is normal for a diff that doesn't change anything: we'll fall through
 332 * into the next diff. Tell the parser to break out.
 333 */
 334static int gitdiff_unrecognized(const char *line, struct patch *patch)
 335{
 336        return -1;
 337}
 338
 339/* Verify that we recognize the lines following a git header */
 340static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch)
 341{
 342        unsigned long offset;
 343
 344        /* A git diff has explicit new/delete information, so we don't guess */
 345        patch->is_new = 0;
 346        patch->is_delete = 0;
 347
 348        line += len;
 349        size -= len;
 350        linenr++;
 351        for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
 352                static const struct opentry {
 353                        const char *str;
 354                        int (*fn)(const char *, struct patch *);
 355                } optable[] = {
 356                        { "@@ -", gitdiff_hdrend },
 357                        { "--- ", gitdiff_oldname },
 358                        { "+++ ", gitdiff_newname },
 359                        { "old mode ", gitdiff_oldmode },
 360                        { "new mode ", gitdiff_newmode },
 361                        { "deleted file mode ", gitdiff_delete },
 362                        { "new file mode ", gitdiff_newfile },
 363                        { "copy from ", gitdiff_copysrc },
 364                        { "copy to ", gitdiff_copydst },
 365                        { "rename from ", gitdiff_renamesrc },
 366                        { "rename to ", gitdiff_renamedst },
 367                        { "similarity index ", gitdiff_similarity },
 368                        { "", gitdiff_unrecognized },
 369                };
 370                int i;
 371
 372                len = linelen(line, size);
 373                if (!len || line[len-1] != '\n')
 374                        break;
 375                for (i = 0; i < sizeof(optable) / sizeof(optable[0]); i++) {
 376                        const struct opentry *p = optable + i;
 377                        int oplen = strlen(p->str);
 378                        if (len < oplen || memcmp(p->str, line, oplen))
 379                                continue;
 380                        if (p->fn(line + oplen, patch) < 0)
 381                                return offset;
 382                        break;
 383                }
 384        }
 385
 386        return offset;
 387}
 388
 389static int parse_num(const char *line, unsigned long *p)
 390{
 391        char *ptr;
 392
 393        if (!isdigit(*line))
 394                return 0;
 395        *p = strtoul(line, &ptr, 10);
 396        return ptr - line;
 397}
 398
 399static int parse_range(const char *line, int len, int offset, const char *expect,
 400                        unsigned long *p1, unsigned long *p2)
 401{
 402        int digits, ex;
 403
 404        if (offset < 0 || offset >= len)
 405                return -1;
 406        line += offset;
 407        len -= offset;
 408
 409        digits = parse_num(line, p1);
 410        if (!digits)
 411                return -1;
 412
 413        offset += digits;
 414        line += digits;
 415        len -= digits;
 416
 417        *p2 = *p1;
 418        if (*line == ',') {
 419                digits = parse_num(line+1, p2);
 420                if (!digits)
 421                        return -1;
 422
 423                offset += digits+1;
 424                line += digits+1;
 425                len -= digits+1;
 426        }
 427
 428        ex = strlen(expect);
 429        if (ex > len)
 430                return -1;
 431        if (memcmp(line, expect, ex))
 432                return -1;
 433
 434        return offset + ex;
 435}
 436
 437/*
 438 * Parse a unified diff fragment header of the
 439 * form "@@ -a,b +c,d @@"
 440 */
 441static int parse_fragment_header(char *line, int len, struct fragment *fragment)
 442{
 443        int offset;
 444
 445        if (!len || line[len-1] != '\n')
 446                return -1;
 447
 448        /* Figure out the number of lines in a fragment */
 449        offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines);
 450        offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines);
 451
 452        return offset;
 453}
 454
 455static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch)
 456{
 457        unsigned long offset, len;
 458
 459        patch->is_rename = patch->is_copy = 0;
 460        patch->is_new = patch->is_delete = -1;
 461        patch->old_mode = patch->new_mode = 0;
 462        patch->old_name = patch->new_name = NULL;
 463        for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
 464                unsigned long nextlen;
 465
 466                len = linelen(line, size);
 467                if (!len)
 468                        break;
 469
 470                /* Testing this early allows us to take a few shortcuts.. */
 471                if (len < 6)
 472                        continue;
 473
 474                /*
 475                 * Make sure we don't find any unconnected patch fragmants.
 476                 * That's a sign that we didn't find a header, and that a
 477                 * patch has become corrupted/broken up.
 478                 */
 479                if (!memcmp("@@ -", line, 4)) {
 480                        struct fragment dummy;
 481                        if (parse_fragment_header(line, len, &dummy) < 0)
 482                                continue;
 483                        error("patch fragment without header at line %d: %.*s", linenr, len-1, line);
 484                }
 485
 486                if (size < len + 6)
 487                        break;
 488
 489                /*
 490                 * Git patch? It might not have a real patch, just a rename
 491                 * or mode change, so we handle that specially
 492                 */
 493                if (!memcmp("diff --git ", line, 11)) {
 494                        int git_hdr_len = parse_git_header(line, len, size, patch);
 495                        if (git_hdr_len < 0)
 496                                continue;
 497
 498                        *hdrsize = git_hdr_len;
 499                        return offset;
 500                }
 501
 502                /** --- followed by +++ ? */
 503                if (memcmp("--- ", line,  4) || memcmp("+++ ", line + len, 4))
 504                        continue;
 505
 506                /*
 507                 * We only accept unified patches, so we want it to
 508                 * at least have "@@ -a,b +c,d @@\n", which is 14 chars
 509                 * minimum
 510                 */
 511                nextlen = linelen(line + len, size - len);
 512                if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
 513                        continue;
 514
 515                /* Ok, we'll consider it a patch */
 516                parse_traditional_patch(line, line+len, patch);
 517                *hdrsize = len + nextlen;
 518                linenr += 2;
 519                return offset;
 520        }
 521        return -1;
 522}
 523
 524/*
 525 * Parse a unified diff. Note that this really needs
 526 * to parse each fragment separately, since the only
 527 * way to know the difference between a "---" that is
 528 * part of a patch, and a "---" that starts the next
 529 * patch is to look at the line counts..
 530 */
 531static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
 532{
 533        int added, deleted;
 534        int len = linelen(line, size), offset;
 535        unsigned long pos[4], oldlines, newlines;
 536
 537        offset = parse_fragment_header(line, len, fragment);
 538        if (offset < 0)
 539                return -1;
 540        oldlines = fragment->oldlines;
 541        newlines = fragment->newlines;
 542
 543        if (patch->is_new < 0 && (pos[0] || oldlines))
 544                patch->is_new = 0;
 545        if (patch->is_delete < 0 && (pos[1] || newlines))
 546                patch->is_delete = 0;
 547
 548        /* Parse the thing.. */
 549        line += len;
 550        size -= len;
 551        linenr++;
 552        added = deleted = 0;
 553        for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
 554                if (!oldlines && !newlines)
 555                        break;
 556                len = linelen(line, size);
 557                if (!len || line[len-1] != '\n')
 558                        return -1;
 559                switch (*line) {
 560                default:
 561                        return -1;
 562                case ' ':
 563                        oldlines--;
 564                        newlines--;
 565                        break;
 566                case '-':
 567                        deleted++;
 568                        oldlines--;
 569                        break;
 570                case '+':
 571                        added++;
 572                        newlines--;
 573                        break;
 574                /* We allow "\ No newline at end of file" */
 575                case '\\':
 576                        break;
 577                }
 578        }
 579        patch->lines_added += added;
 580        patch->lines_deleted += deleted;
 581        return offset;
 582}
 583
 584static int parse_single_patch(char *line, unsigned long size, struct patch *patch)
 585{
 586        unsigned long offset = 0;
 587        struct fragment **fragp = &patch->fragments;
 588
 589        while (size > 4 && !memcmp(line, "@@ -", 4)) {
 590                struct fragment *fragment;
 591                int len;
 592
 593                fragment = xmalloc(sizeof(*fragment));
 594                memset(fragment, 0, sizeof(*fragment));
 595                len = parse_fragment(line, size, patch, fragment);
 596                if (len <= 0)
 597                        die("corrupt patch at line %d", linenr);
 598
 599                fragment->patch = line;
 600                fragment->size = len;
 601
 602                *fragp = fragment;
 603                fragp = &fragment->next;
 604
 605                offset += len;
 606                line += len;
 607                size -= len;
 608        }
 609        return offset;
 610}
 611
 612static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
 613{
 614        int hdrsize, patchsize;
 615        int offset = find_header(buffer, size, &hdrsize, patch);
 616
 617        if (offset < 0)
 618                return offset;
 619
 620        patchsize = parse_single_patch(buffer + offset + hdrsize, size - offset - hdrsize, patch);
 621
 622        return offset + hdrsize + patchsize;
 623}
 624
 625const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
 626const char minuses[]= "----------------------------------------------------------------------";
 627
 628static void show_stats(struct patch *patch)
 629{
 630        char *name = patch->old_name;
 631        int len, max, add, del;
 632
 633        if (!name) {
 634                name = patch->new_name;
 635                if (!name)
 636                        return;
 637        }
 638
 639        /*
 640         * "scale" the filename
 641         */
 642        len = strlen(name);
 643        max = max_len;
 644        if (max > 50)
 645                max = 50;
 646        if (len > max)
 647                name += len - max;
 648        len = max;
 649
 650        /*
 651         * scale the add/delete
 652         */
 653        max = max_change;
 654        if (max + len > 70)
 655                max = 70 - len;
 656        
 657        add = (patch->lines_added * max + max_change/2) / max_change;
 658        del = (patch->lines_deleted * max + max_change/2) / max_change;
 659        printf(" %-*s |%5d %.*s%.*s\n",
 660                len, name, patch->lines_added + patch->lines_deleted,
 661                add, pluses, del, minuses);
 662}
 663
 664static void check_patch(struct patch *patch)
 665{
 666        const char *old_name = patch->old_name;
 667        const char *new_name = patch->new_name;
 668
 669        if (old_name) {
 670                if (cache_name_pos(old_name, strlen(old_name)) < 0)
 671                        die("file %s does not exist", old_name);
 672                if (patch->is_new < 0)
 673                        patch->is_new = 0;
 674        }
 675        if (new_name && (patch->is_new | patch->is_rename | patch->is_copy)) {
 676                if (cache_name_pos(new_name, strlen(new_name)) >= 0)
 677                        die("file %s already exists", new_name);
 678        }
 679}
 680
 681static void apply_patch_list(struct patch *patch)
 682{
 683        int files, adds, dels;
 684
 685        files = adds = dels = 0;
 686        if (!patch)
 687                die("no patch found");
 688        do {
 689                if (check)
 690                        check_patch(patch);
 691
 692                if (diffstat) {
 693                        files++;
 694                        adds += patch->lines_added;
 695                        dels += patch->lines_deleted;
 696                        show_stats(patch);
 697                }
 698        } while ((patch = patch->next) != NULL);
 699
 700        if (diffstat)
 701                printf(" %d files changed, %d insertions(+), %d deletions(-)\n", files, adds, dels);
 702}
 703
 704static void patch_stats(struct patch *patch)
 705{
 706        int lines = patch->lines_added + patch->lines_deleted;
 707
 708        if (lines > max_change)
 709                max_change = lines;
 710        if (patch->old_name) {
 711                int len = strlen(patch->old_name);
 712                if (len > max_len)
 713                        max_len = len;
 714        }
 715        if (patch->new_name) {
 716                int len = strlen(patch->new_name);
 717                if (len > max_len)
 718                        max_len = len;
 719        }
 720}
 721
 722static int apply_patch(int fd)
 723{
 724        unsigned long offset, size;
 725        char *buffer = read_patch_file(fd, &size);
 726        struct patch *list = NULL, **listp = &list;
 727
 728        if (!buffer)
 729                return -1;
 730        offset = 0;
 731        while (size > 0) {
 732                struct patch *patch;
 733                int nr;
 734
 735                patch = xmalloc(sizeof(*patch));
 736                memset(patch, 0, sizeof(*patch));
 737                nr = parse_chunk(buffer + offset, size, patch);
 738                if (nr < 0)
 739                        break;
 740                patch_stats(patch);
 741                *listp = patch;
 742                listp = &patch->next;
 743                offset += nr;
 744                size -= nr;
 745        }
 746
 747        apply_patch_list(list);
 748
 749        free(buffer);
 750        return 0;
 751}
 752
 753int main(int argc, char **argv)
 754{
 755        int i;
 756        int read_stdin = 1;
 757
 758        if (read_cache() < 0)
 759                die("unable to read index file");
 760
 761        for (i = 1; i < argc; i++) {
 762                const char *arg = argv[i];
 763                int fd;
 764
 765                if (!strcmp(arg, "-")) {
 766                        apply_patch(0);
 767                        read_stdin = 0;
 768                        continue;
 769                }
 770                if (!strcmp(arg, "--no-merge")) {
 771                        merge_patch = 0;
 772                        continue;
 773                }
 774                if (!strcmp(arg, "--stat")) {
 775                        check = 0;
 776                        diffstat = 1;
 777                        continue;
 778                }
 779                fd = open(arg, O_RDONLY);
 780                if (fd < 0)
 781                        usage(apply_usage);
 782                read_stdin = 0;
 783                apply_patch(fd);
 784                close(fd);
 785        }
 786        if (read_stdin)
 787                apply_patch(0);
 788        return 0;
 789}