apply.con commit git-apply: implement "diffstat" output (3f40315)
   1/*
   2 * apply.c
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This applies patches on top of some (arbitrary) version of the SCM.
   7 *
   8 * NOTE! It does all its work in the index file, and only cares about
   9 * the files in the working directory if you tell it to "merge" the
  10 * patch apply.
  11 *
  12 * Even when merging it always takes the source from the index, and
  13 * uses the working tree as a "branch" for a 3-way merge.
  14 */
  15#include <ctype.h>
  16
  17#include "cache.h"
  18
  19// We default to the merge behaviour, since that's what most people would
  20// expect
  21static int merge_patch = 1;
  22static const char apply_usage[] = "git-apply <patch>";
  23
  24/*
  25 * For "diff-stat" like behaviour, we keep track of the biggest change
  26 * we've seen, and the longest filename. That allows us to do simple
  27 * scaling.
  28 */
  29static int max_change, max_len;
  30
  31/*
  32 * Various "current state", notably line numbers and what
  33 * file (and how) we're patching right now.. The "is_xxxx"
  34 * things are flags, where -1 means "don't know yet".
  35 */
  36static int linenr = 1;
  37static char *def_name = NULL;
  38
  39struct fragment {
  40        unsigned long oldpos, oldlines;
  41        unsigned long newpos, newlines;
  42        const char *patch;
  43        int size;
  44        struct fragment *next;
  45};
  46
  47struct patch {
  48        char *new_name, *old_name;
  49        unsigned int old_mode, new_mode;
  50        int is_rename, is_copy, is_new, is_delete;
  51        int lines_added, lines_deleted;
  52        struct fragment *fragments;
  53        struct patch *next;
  54};
  55
  56#define CHUNKSIZE (8192)
  57#define SLOP (16)
  58
  59static void *read_patch_file(int fd, unsigned long *sizep)
  60{
  61        unsigned long size = 0, alloc = CHUNKSIZE;
  62        void *buffer = xmalloc(alloc);
  63
  64        for (;;) {
  65                int nr = alloc - size;
  66                if (nr < 1024) {
  67                        alloc += CHUNKSIZE;
  68                        buffer = xrealloc(buffer, alloc);
  69                        nr = alloc - size;
  70                }
  71                nr = read(fd, buffer + size, nr);
  72                if (!nr)
  73                        break;
  74                if (nr < 0) {
  75                        if (errno == EAGAIN)
  76                                continue;
  77                        die("git-apply: read returned %s", strerror(errno));
  78                }
  79                size += nr;
  80        }
  81        *sizep = size;
  82
  83        /*
  84         * Make sure that we have some slop in the buffer
  85         * so that we can do speculative "memcmp" etc, and
  86         * see to it that it is NUL-filled.
  87         */
  88        if (alloc < size + SLOP)
  89                buffer = xrealloc(buffer, size + SLOP);
  90        memset(buffer + size, 0, SLOP);
  91        return buffer;
  92}
  93
  94static unsigned long linelen(char *buffer, unsigned long size)
  95{
  96        unsigned long len = 0;
  97        while (size--) {
  98                len++;
  99                if (*buffer++ == '\n')
 100                        break;
 101        }
 102        return len;
 103}
 104
 105static int is_dev_null(const char *str)
 106{
 107        return !memcmp("/dev/null", str, 9) && isspace(str[9]);
 108}
 109
 110#define TERM_EXIST      1
 111#define TERM_SPACE      2
 112#define TERM_TAB        4
 113
 114static int name_terminate(const char *name, int namelen, int c, int terminate)
 115{
 116        if (c == ' ' && !(terminate & TERM_SPACE))
 117                return 0;
 118        if (c == '\t' && !(terminate & TERM_TAB))
 119                return 0;
 120
 121        /*
 122         * Do we want an existing name? Return false and
 123         * continue if it's not there.
 124         */
 125        if (terminate & TERM_EXIST)
 126                return cache_name_pos(name, namelen) >= 0;
 127
 128        return 1;
 129}
 130
 131static char * find_name(const char *line, char *def, int p_value, int terminate)
 132{
 133        int len;
 134        const char *start = line;
 135        char *name;
 136
 137        for (;;) {
 138                char c = *line;
 139
 140                if (isspace(c)) {
 141                        if (c == '\n')
 142                                break;
 143                        if (name_terminate(start, line-start, c, terminate))
 144                                break;
 145                }
 146                line++;
 147                if (c == '/' && !--p_value)
 148                        start = line;
 149        }
 150        if (!start)
 151                return def;
 152        len = line - start;
 153        if (!len)
 154                return def;
 155
 156        /*
 157         * Generally we prefer the shorter name, especially
 158         * if the other one is just a variation of that with
 159         * something else tacked on to the end (ie "file.orig"
 160         * or "file~").
 161         */
 162        if (def) {
 163                int deflen = strlen(def);
 164                if (deflen < len && !strncmp(start, def, deflen))
 165                        return def;
 166        }
 167
 168        name = xmalloc(len + 1);
 169        memcpy(name, start, len);
 170        name[len] = 0;
 171        free(def);
 172        return name;
 173}
 174
 175/*
 176 * Get the name etc info from the --/+++ lines of a traditional patch header
 177 *
 178 * NOTE! This hardcodes "-p1" behaviour in filename detection.
 179 *
 180 * FIXME! The end-of-filename heuristics are kind of screwy. For existing
 181 * files, we can happily check the index for a match, but for creating a
 182 * new file we should try to match whatever "patch" does. I have no idea.
 183 */
 184static void parse_traditional_patch(const char *first, const char *second, struct patch *patch)
 185{
 186        int p_value = 1;
 187        char *name;
 188
 189        first += 4;     // skip "--- "
 190        second += 4;    // skip "+++ "
 191        if (is_dev_null(first)) {
 192                patch->is_new = 1;
 193                patch->is_delete = 0;
 194                name = find_name(second, def_name, p_value, TERM_SPACE | TERM_TAB);
 195                patch->new_name = name;
 196        } else if (is_dev_null(second)) {
 197                patch->is_new = 0;
 198                patch->is_delete = 1;
 199                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 200                patch->old_name = name;
 201        } else {
 202                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 203                name = find_name(second, name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 204                patch->old_name = patch->new_name = name;
 205        }
 206        if (!name)
 207                die("unable to find filename in patch at line %d", linenr);
 208}
 209
 210static int gitdiff_hdrend(const char *line, struct patch *patch)
 211{
 212        return -1;
 213}
 214
 215/*
 216 * We're anal about diff header consistency, to make
 217 * sure that we don't end up having strange ambiguous
 218 * patches floating around.
 219 *
 220 * As a result, gitdiff_{old|new}name() will check
 221 * their names against any previous information, just
 222 * to make sure..
 223 */
 224static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew)
 225{
 226        int len;
 227        const char *name;
 228
 229        if (!orig_name && !isnull)
 230                return find_name(line, NULL, 1, 0);
 231
 232        name = "/dev/null";
 233        len = 9;
 234        if (orig_name) {
 235                name = orig_name;
 236                len = strlen(name);
 237                if (isnull)
 238                        die("git-apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr);
 239        }
 240
 241        if (*name == '/')
 242                goto absolute_path;
 243
 244        for (;;) {
 245                char c = *line++;
 246                if (c == '\n')
 247                        break;
 248                if (c != '/')
 249                        continue;
 250absolute_path:
 251                if (memcmp(line, name, len) || line[len] != '\n')
 252                        break;
 253                return orig_name;
 254        }
 255        die("git-apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr);
 256        return NULL;
 257}
 258
 259static int gitdiff_oldname(const char *line, struct patch *patch)
 260{
 261        patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old");
 262        return 0;
 263}
 264
 265static int gitdiff_newname(const char *line, struct patch *patch)
 266{
 267        patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new");
 268        return 0;
 269}
 270
 271static int gitdiff_oldmode(const char *line, struct patch *patch)
 272{
 273        patch->old_mode = strtoul(line, NULL, 8);
 274        return 0;
 275}
 276
 277static int gitdiff_newmode(const char *line, struct patch *patch)
 278{
 279        patch->new_mode = strtoul(line, NULL, 8);
 280        return 0;
 281}
 282
 283static int gitdiff_delete(const char *line, struct patch *patch)
 284{
 285        patch->is_delete = 1;
 286        return gitdiff_oldmode(line, patch);
 287}
 288
 289static int gitdiff_newfile(const char *line, struct patch *patch)
 290{
 291        patch->is_new = 1;
 292        return gitdiff_newmode(line, patch);
 293}
 294
 295static int gitdiff_copysrc(const char *line, struct patch *patch)
 296{
 297        patch->is_copy = 1;
 298        patch->old_name = find_name(line, NULL, 0, 0);
 299        return 0;
 300}
 301
 302static int gitdiff_copydst(const char *line, struct patch *patch)
 303{
 304        patch->is_copy = 1;
 305        patch->new_name = find_name(line, NULL, 0, 0);
 306        return 0;
 307}
 308
 309static int gitdiff_renamesrc(const char *line, struct patch *patch)
 310{
 311        patch->is_rename = 1;
 312        patch->old_name = find_name(line, NULL, 0, 0);
 313        return 0;
 314}
 315
 316static int gitdiff_renamedst(const char *line, struct patch *patch)
 317{
 318        patch->is_rename = 1;
 319        patch->new_name = find_name(line, NULL, 0, 0);
 320        return 0;
 321}
 322
 323static int gitdiff_similarity(const char *line, struct patch *patch)
 324{
 325        return 0;
 326}
 327
 328/*
 329 * This is normal for a diff that doesn't change anything: we'll fall through
 330 * into the next diff. Tell the parser to break out.
 331 */
 332static int gitdiff_unrecognized(const char *line, struct patch *patch)
 333{
 334        return -1;
 335}
 336
 337/* Verify that we recognize the lines following a git header */
 338static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch)
 339{
 340        unsigned long offset;
 341
 342        /* A git diff has explicit new/delete information, so we don't guess */
 343        patch->is_new = 0;
 344        patch->is_delete = 0;
 345
 346        line += len;
 347        size -= len;
 348        linenr++;
 349        for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
 350                static const struct opentry {
 351                        const char *str;
 352                        int (*fn)(const char *, struct patch *);
 353                } optable[] = {
 354                        { "@@ -", gitdiff_hdrend },
 355                        { "--- ", gitdiff_oldname },
 356                        { "+++ ", gitdiff_newname },
 357                        { "old mode ", gitdiff_oldmode },
 358                        { "new mode ", gitdiff_newmode },
 359                        { "deleted file mode ", gitdiff_delete },
 360                        { "new file mode ", gitdiff_newfile },
 361                        { "copy from ", gitdiff_copysrc },
 362                        { "copy to ", gitdiff_copydst },
 363                        { "rename from ", gitdiff_renamesrc },
 364                        { "rename to ", gitdiff_renamedst },
 365                        { "similarity index ", gitdiff_similarity },
 366                        { "", gitdiff_unrecognized },
 367                };
 368                int i;
 369
 370                len = linelen(line, size);
 371                if (!len || line[len-1] != '\n')
 372                        break;
 373                for (i = 0; i < sizeof(optable) / sizeof(optable[0]); i++) {
 374                        const struct opentry *p = optable + i;
 375                        int oplen = strlen(p->str);
 376                        if (len < oplen || memcmp(p->str, line, oplen))
 377                                continue;
 378                        if (p->fn(line + oplen, patch) < 0)
 379                                return offset;
 380                        break;
 381                }
 382        }
 383
 384        return offset;
 385}
 386
 387static int parse_num(const char *line, int len, int offset, const char *expect, unsigned long *p)
 388{
 389        char *ptr;
 390        int digits, ex;
 391
 392        if (offset < 0 || offset >= len)
 393                return -1;
 394        line += offset;
 395        len -= offset;
 396
 397        if (!isdigit(*line))
 398                return -1;
 399        *p = strtoul(line, &ptr, 10);
 400
 401        digits = ptr - line;
 402
 403        offset += digits;
 404        line += digits;
 405        len -= digits;
 406
 407        ex = strlen(expect);
 408        if (ex > len)
 409                return -1;
 410        if (memcmp(line, expect, ex))
 411                return -1;
 412
 413        return offset + ex;
 414}
 415
 416/*
 417 * Parse a unified diff fragment header of the
 418 * form "@@ -a,b +c,d @@"
 419 */
 420static int parse_fragment_header(char *line, int len, struct fragment *fragment)
 421{
 422        int offset;
 423
 424        if (!len || line[len-1] != '\n')
 425                return -1;
 426
 427        /* Figure out the number of lines in a fragment */
 428        offset = parse_num(line, len, 4, ",", &fragment->oldpos);
 429        offset = parse_num(line, len, offset, " +", &fragment->oldlines);
 430        offset = parse_num(line, len, offset, ",", &fragment->newpos);
 431        offset = parse_num(line, len, offset, " @@", &fragment->newlines);
 432
 433        return offset;
 434}
 435
 436static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch)
 437{
 438        unsigned long offset, len;
 439
 440        patch->is_rename = patch->is_copy = 0;
 441        patch->is_new = patch->is_delete = -1;
 442        patch->old_mode = patch->new_mode = 0;
 443        patch->old_name = patch->new_name = NULL;
 444        for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
 445                unsigned long nextlen;
 446
 447                len = linelen(line, size);
 448                if (!len)
 449                        break;
 450
 451                /* Testing this early allows us to take a few shortcuts.. */
 452                if (len < 6)
 453                        continue;
 454
 455                /*
 456                 * Make sure we don't find any unconnected patch fragmants.
 457                 * That's a sign that we didn't find a header, and that a
 458                 * patch has become corrupted/broken up.
 459                 */
 460                if (!memcmp("@@ -", line, 4)) {
 461                        struct fragment dummy;
 462                        if (parse_fragment_header(line, len, &dummy) < 0)
 463                                continue;
 464                        error("patch fragment without header at line %d: %.*s", linenr, len-1, line);
 465                }
 466
 467                if (size < len + 6)
 468                        break;
 469
 470                /*
 471                 * Git patch? It might not have a real patch, just a rename
 472                 * or mode change, so we handle that specially
 473                 */
 474                if (!memcmp("diff --git ", line, 11)) {
 475                        int git_hdr_len = parse_git_header(line, len, size, patch);
 476                        if (git_hdr_len < 0)
 477                                continue;
 478
 479                        *hdrsize = git_hdr_len;
 480                        return offset;
 481                }
 482
 483                /** --- followed by +++ ? */
 484                if (memcmp("--- ", line,  4) || memcmp("+++ ", line + len, 4))
 485                        continue;
 486
 487                /*
 488                 * We only accept unified patches, so we want it to
 489                 * at least have "@@ -a,b +c,d @@\n", which is 14 chars
 490                 * minimum
 491                 */
 492                nextlen = linelen(line + len, size - len);
 493                if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
 494                        continue;
 495
 496                /* Ok, we'll consider it a patch */
 497                parse_traditional_patch(line, line+len, patch);
 498                *hdrsize = len + nextlen;
 499                linenr += 2;
 500                return offset;
 501        }
 502        return -1;
 503}
 504
 505/*
 506 * Parse a unified diff. Note that this really needs
 507 * to parse each fragment separately, since the only
 508 * way to know the difference between a "---" that is
 509 * part of a patch, and a "---" that starts the next
 510 * patch is to look at the line counts..
 511 */
 512static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
 513{
 514        int added, deleted;
 515        int len = linelen(line, size), offset;
 516        unsigned long pos[4], oldlines, newlines;
 517
 518        offset = parse_fragment_header(line, len, fragment);
 519        if (offset < 0)
 520                return -1;
 521        oldlines = fragment->oldlines;
 522        newlines = fragment->newlines;
 523
 524        if (patch->is_new < 0 && (pos[0] || oldlines))
 525                patch->is_new = 0;
 526        if (patch->is_delete < 0 && (pos[1] || newlines))
 527                patch->is_delete = 0;
 528
 529        /* Parse the thing.. */
 530        line += len;
 531        size -= len;
 532        linenr++;
 533        added = deleted = 0;
 534        for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
 535                if (!oldlines && !newlines)
 536                        break;
 537                len = linelen(line, size);
 538                if (!len || line[len-1] != '\n')
 539                        return -1;
 540                switch (*line) {
 541                default:
 542                        return -1;
 543                case ' ':
 544                        oldlines--;
 545                        newlines--;
 546                        break;
 547                case '-':
 548                        deleted++;
 549                        oldlines--;
 550                        break;
 551                case '+':
 552                        added++;
 553                        newlines--;
 554                        break;
 555                }
 556        }
 557        patch->lines_added += added;
 558        patch->lines_deleted += deleted;
 559        return offset;
 560}
 561
 562static int parse_single_patch(char *line, unsigned long size, struct patch *patch)
 563{
 564        unsigned long offset = 0;
 565        struct fragment **fragp = &patch->fragments;
 566
 567        while (size > 4 && !memcmp(line, "@@ -", 4)) {
 568                struct fragment *fragment;
 569                int len;
 570
 571                fragment = xmalloc(sizeof(*fragment));
 572                memset(fragment, 0, sizeof(*fragment));
 573                len = parse_fragment(line, size, patch, fragment);
 574                if (len <= 0)
 575                        die("corrupt patch at line %d", linenr);
 576
 577                fragment->patch = line;
 578                fragment->size = len;
 579
 580                *fragp = fragment;
 581                fragp = &fragment->next;
 582
 583                offset += len;
 584                line += len;
 585                size -= len;
 586        }
 587        return offset;
 588}
 589
 590static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
 591{
 592        int hdrsize, patchsize;
 593        int offset = find_header(buffer, size, &hdrsize, patch);
 594
 595        if (offset < 0)
 596                return offset;
 597
 598        patchsize = parse_single_patch(buffer + offset + hdrsize, size - offset - hdrsize, patch);
 599
 600        return offset + hdrsize + patchsize;
 601}
 602
 603const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
 604const char minuses[]= "----------------------------------------------------------------------";
 605
 606static void show_stats(struct patch *patch)
 607{
 608        char *name = patch->old_name;
 609        int len, max, add, del;
 610
 611        if (!name)
 612                name = patch->new_name;
 613
 614        /*
 615         * "scale" the filename
 616         */
 617        len = strlen(name);
 618        max = max_len;
 619        if (max > 50)
 620                max = 50;
 621        if (len > max)
 622                name += len - max;
 623        len = max;
 624
 625        /*
 626         * scale the add/delete
 627         */
 628        max = max_change;
 629        if (max + len > 70)
 630                max = 70 - len;
 631        
 632        add = (patch->lines_added * max + max_change/2) / max_change;
 633        del = (patch->lines_deleted * max + max_change/2) / max_change;
 634        printf(" %-*s |%5d %.*s%.*s\n",
 635                len, name, patch->lines_added + patch->lines_deleted,
 636                add, pluses, del, minuses);
 637}
 638
 639static void apply_patch_list(struct patch *patch)
 640{
 641        int files, adds, dels;
 642
 643        files = adds = dels = 0;
 644        if (!patch)
 645                die("no patch found");
 646        do {
 647                const char *old_name = patch->old_name;
 648                const char *new_name = patch->new_name;
 649
 650                if (old_name) {
 651                        if (cache_name_pos(old_name, strlen(old_name)) < 0)
 652                                die("file %s does not exist", old_name);
 653                        if (patch->is_new < 0)
 654                                patch->is_new = 0;
 655                }
 656                if (new_name && (patch->is_new | patch->is_rename | patch->is_copy)) {
 657                        if (cache_name_pos(new_name, strlen(new_name)) >= 0)
 658                                die("file %s already exists", new_name);
 659                }
 660
 661                files++;
 662                adds += patch->lines_added;
 663                dels += patch->lines_deleted;
 664                show_stats(patch);
 665        } while ((patch = patch->next) != NULL);
 666        printf(" %d files changed, %d insertions(+), %d deletions(-)\n", files, adds, dels);
 667}
 668
 669static void patch_stats(struct patch *patch)
 670{
 671        int lines = patch->lines_added + patch->lines_deleted;
 672
 673        if (lines > max_change)
 674                max_change = lines;
 675        if (patch->old_name) {
 676                int len = strlen(patch->old_name);
 677                if (len > max_len)
 678                        max_len = len;
 679        }
 680        if (patch->new_name) {
 681                int len = strlen(patch->new_name);
 682                if (len > max_len)
 683                        max_len = len;
 684        }
 685}
 686
 687static int apply_patch(int fd)
 688{
 689        unsigned long offset, size;
 690        char *buffer = read_patch_file(fd, &size);
 691        struct patch *list = NULL, **listp = &list;
 692
 693        if (!buffer)
 694                return -1;
 695        offset = 0;
 696        while (size > 0) {
 697                struct patch *patch;
 698                int nr;
 699
 700                patch = xmalloc(sizeof(*patch));
 701                memset(patch, 0, sizeof(*patch));
 702                nr = parse_chunk(buffer + offset, size, patch);
 703                if (nr < 0)
 704                        break;
 705                patch_stats(patch);
 706                *listp = patch;
 707                listp = &patch->next;
 708                offset += nr;
 709                size -= nr;
 710        }
 711
 712        apply_patch_list(list);
 713
 714        free(buffer);
 715        return 0;
 716}
 717
 718int main(int argc, char **argv)
 719{
 720        int i;
 721        int read_stdin = 1;
 722
 723        if (read_cache() < 0)
 724                die("unable to read index file");
 725
 726        for (i = 1; i < argc; i++) {
 727                const char *arg = argv[i];
 728                int fd;
 729
 730                if (!strcmp(arg, "-")) {
 731                        apply_patch(0);
 732                        read_stdin = 0;
 733                        continue;
 734                }
 735                if (!strcmp(arg, "--no-merge")) {
 736                        merge_patch = 0;
 737                        continue;
 738                }
 739                fd = open(arg, O_RDONLY);
 740                if (fd < 0)
 741                        usage(apply_usage);
 742                read_stdin = 0;
 743                apply_patch(fd);
 744                close(fd);
 745        }
 746        if (read_stdin)
 747                apply_patch(0);
 748        return 0;
 749}