apply.con commit git-apply: parse the whole list of patches into memory first (19c58fb)
   1/*
   2 * apply.c
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This applies patches on top of some (arbitrary) version of the SCM.
   7 *
   8 * NOTE! It does all its work in the index file, and only cares about
   9 * the files in the working directory if you tell it to "merge" the
  10 * patch apply.
  11 *
  12 * Even when merging it always takes the source from the index, and
  13 * uses the working tree as a "branch" for a 3-way merge.
  14 */
  15#include <ctype.h>
  16
  17#include "cache.h"
  18
  19// We default to the merge behaviour, since that's what most people would
  20// expect
  21static int merge_patch = 1;
  22static const char apply_usage[] = "git-apply <patch>";
  23
  24/*
  25 * Various "current state", notably line numbers and what
  26 * file (and how) we're patching right now.. The "is_xxxx"
  27 * things are flags, where -1 means "don't know yet".
  28 */
  29static int linenr = 1;
  30static char *def_name = NULL;
  31
  32struct fragment {
  33        unsigned long oldpos, oldlines;
  34        unsigned long newpos, newlines;
  35        const char *patch;
  36        int size;
  37        struct fragment *next;
  38};
  39
  40struct patch {
  41        char *new_name, *old_name;
  42        unsigned int old_mode, new_mode;
  43        int is_rename, is_copy, is_new, is_delete;
  44        struct fragment *fragments;
  45        struct patch *next;
  46};
  47
  48#define CHUNKSIZE (8192)
  49#define SLOP (16)
  50
  51static void *read_patch_file(int fd, unsigned long *sizep)
  52{
  53        unsigned long size = 0, alloc = CHUNKSIZE;
  54        void *buffer = xmalloc(alloc);
  55
  56        for (;;) {
  57                int nr = alloc - size;
  58                if (nr < 1024) {
  59                        alloc += CHUNKSIZE;
  60                        buffer = xrealloc(buffer, alloc);
  61                        nr = alloc - size;
  62                }
  63                nr = read(fd, buffer + size, nr);
  64                if (!nr)
  65                        break;
  66                if (nr < 0) {
  67                        if (errno == EAGAIN)
  68                                continue;
  69                        die("git-apply: read returned %s", strerror(errno));
  70                }
  71                size += nr;
  72        }
  73        *sizep = size;
  74
  75        /*
  76         * Make sure that we have some slop in the buffer
  77         * so that we can do speculative "memcmp" etc, and
  78         * see to it that it is NUL-filled.
  79         */
  80        if (alloc < size + SLOP)
  81                buffer = xrealloc(buffer, size + SLOP);
  82        memset(buffer + size, 0, SLOP);
  83        return buffer;
  84}
  85
  86static unsigned long linelen(char *buffer, unsigned long size)
  87{
  88        unsigned long len = 0;
  89        while (size--) {
  90                len++;
  91                if (*buffer++ == '\n')
  92                        break;
  93        }
  94        return len;
  95}
  96
  97static int is_dev_null(const char *str)
  98{
  99        return !memcmp("/dev/null", str, 9) && isspace(str[9]);
 100}
 101
 102#define TERM_EXIST      1
 103#define TERM_SPACE      2
 104#define TERM_TAB        4
 105
 106static int name_terminate(const char *name, int namelen, int c, int terminate)
 107{
 108        if (c == ' ' && !(terminate & TERM_SPACE))
 109                return 0;
 110        if (c == '\t' && !(terminate & TERM_TAB))
 111                return 0;
 112
 113        /*
 114         * Do we want an existing name? Return false and
 115         * continue if it's not there.
 116         */
 117        if (terminate & TERM_EXIST)
 118                return cache_name_pos(name, namelen) >= 0;
 119
 120        return 1;
 121}
 122
 123static char * find_name(const char *line, char *def, int p_value, int terminate)
 124{
 125        int len;
 126        const char *start = line;
 127        char *name;
 128
 129        for (;;) {
 130                char c = *line;
 131
 132                if (isspace(c)) {
 133                        if (c == '\n')
 134                                break;
 135                        if (name_terminate(start, line-start, c, terminate))
 136                                break;
 137                }
 138                line++;
 139                if (c == '/' && !--p_value)
 140                        start = line;
 141        }
 142        if (!start)
 143                return def;
 144        len = line - start;
 145        if (!len)
 146                return def;
 147
 148        /*
 149         * Generally we prefer the shorter name, especially
 150         * if the other one is just a variation of that with
 151         * something else tacked on to the end (ie "file.orig"
 152         * or "file~").
 153         */
 154        if (def) {
 155                int deflen = strlen(def);
 156                if (deflen < len && !strncmp(start, def, deflen))
 157                        return def;
 158        }
 159
 160        name = xmalloc(len + 1);
 161        memcpy(name, start, len);
 162        name[len] = 0;
 163        free(def);
 164        return name;
 165}
 166
 167/*
 168 * Get the name etc info from the --/+++ lines of a traditional patch header
 169 *
 170 * NOTE! This hardcodes "-p1" behaviour in filename detection.
 171 *
 172 * FIXME! The end-of-filename heuristics are kind of screwy. For existing
 173 * files, we can happily check the index for a match, but for creating a
 174 * new file we should try to match whatever "patch" does. I have no idea.
 175 */
 176static void parse_traditional_patch(const char *first, const char *second, struct patch *patch)
 177{
 178        int p_value = 1;
 179        char *name;
 180
 181        first += 4;     // skip "--- "
 182        second += 4;    // skip "+++ "
 183        if (is_dev_null(first)) {
 184                patch->is_new = 1;
 185                patch->is_delete = 0;
 186                name = find_name(second, def_name, p_value, TERM_SPACE | TERM_TAB);
 187                patch->new_name = name;
 188        } else if (is_dev_null(second)) {
 189                patch->is_new = 0;
 190                patch->is_delete = 1;
 191                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 192                patch->old_name = name;
 193        } else {
 194                name = find_name(first, def_name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 195                name = find_name(second, name, p_value, TERM_EXIST | TERM_SPACE | TERM_TAB);
 196                patch->old_name = patch->new_name = name;
 197        }
 198        if (!name)
 199                die("unable to find filename in patch at line %d", linenr);
 200}
 201
 202static int gitdiff_hdrend(const char *line, struct patch *patch)
 203{
 204        return -1;
 205}
 206
 207/*
 208 * We're anal about diff header consistency, to make
 209 * sure that we don't end up having strange ambiguous
 210 * patches floating around.
 211 *
 212 * As a result, gitdiff_{old|new}name() will check
 213 * their names against any previous information, just
 214 * to make sure..
 215 */
 216static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew)
 217{
 218        int len;
 219        const char *name;
 220
 221        if (!orig_name && !isnull)
 222                return find_name(line, NULL, 1, 0);
 223
 224        name = "/dev/null";
 225        len = 9;
 226        if (orig_name) {
 227                name = orig_name;
 228                len = strlen(name);
 229                if (isnull)
 230                        die("git-apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr);
 231        }
 232
 233        if (*name == '/')
 234                goto absolute_path;
 235
 236        for (;;) {
 237                char c = *line++;
 238                if (c == '\n')
 239                        break;
 240                if (c != '/')
 241                        continue;
 242absolute_path:
 243                if (memcmp(line, name, len) || line[len] != '\n')
 244                        break;
 245                return orig_name;
 246        }
 247        die("git-apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr);
 248        return NULL;
 249}
 250
 251static int gitdiff_oldname(const char *line, struct patch *patch)
 252{
 253        patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old");
 254        return 0;
 255}
 256
 257static int gitdiff_newname(const char *line, struct patch *patch)
 258{
 259        patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new");
 260        return 0;
 261}
 262
 263static int gitdiff_oldmode(const char *line, struct patch *patch)
 264{
 265        patch->old_mode = strtoul(line, NULL, 8);
 266        return 0;
 267}
 268
 269static int gitdiff_newmode(const char *line, struct patch *patch)
 270{
 271        patch->new_mode = strtoul(line, NULL, 8);
 272        return 0;
 273}
 274
 275static int gitdiff_delete(const char *line, struct patch *patch)
 276{
 277        patch->is_delete = 1;
 278        return gitdiff_oldmode(line, patch);
 279}
 280
 281static int gitdiff_newfile(const char *line, struct patch *patch)
 282{
 283        patch->is_new = 1;
 284        return gitdiff_newmode(line, patch);
 285}
 286
 287static int gitdiff_copysrc(const char *line, struct patch *patch)
 288{
 289        patch->is_copy = 1;
 290        patch->old_name = find_name(line, NULL, 0, 0);
 291        return 0;
 292}
 293
 294static int gitdiff_copydst(const char *line, struct patch *patch)
 295{
 296        patch->is_copy = 1;
 297        patch->new_name = find_name(line, NULL, 0, 0);
 298        return 0;
 299}
 300
 301static int gitdiff_renamesrc(const char *line, struct patch *patch)
 302{
 303        patch->is_rename = 1;
 304        patch->old_name = find_name(line, NULL, 0, 0);
 305        return 0;
 306}
 307
 308static int gitdiff_renamedst(const char *line, struct patch *patch)
 309{
 310        patch->is_rename = 1;
 311        patch->new_name = find_name(line, NULL, 0, 0);
 312        return 0;
 313}
 314
 315static int gitdiff_similarity(const char *line, struct patch *patch)
 316{
 317        return 0;
 318}
 319
 320/*
 321 * This is normal for a diff that doesn't change anything: we'll fall through
 322 * into the next diff. Tell the parser to break out.
 323 */
 324static int gitdiff_unrecognized(const char *line, struct patch *patch)
 325{
 326        return -1;
 327}
 328
 329/* Verify that we recognize the lines following a git header */
 330static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch)
 331{
 332        unsigned long offset;
 333
 334        /* A git diff has explicit new/delete information, so we don't guess */
 335        patch->is_new = 0;
 336        patch->is_delete = 0;
 337
 338        line += len;
 339        size -= len;
 340        linenr++;
 341        for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
 342                static const struct opentry {
 343                        const char *str;
 344                        int (*fn)(const char *, struct patch *);
 345                } optable[] = {
 346                        { "@@ -", gitdiff_hdrend },
 347                        { "--- ", gitdiff_oldname },
 348                        { "+++ ", gitdiff_newname },
 349                        { "old mode ", gitdiff_oldmode },
 350                        { "new mode ", gitdiff_newmode },
 351                        { "deleted file mode ", gitdiff_delete },
 352                        { "new file mode ", gitdiff_newfile },
 353                        { "copy from ", gitdiff_copysrc },
 354                        { "copy to ", gitdiff_copydst },
 355                        { "rename from ", gitdiff_renamesrc },
 356                        { "rename to ", gitdiff_renamedst },
 357                        { "similarity index ", gitdiff_similarity },
 358                        { "", gitdiff_unrecognized },
 359                };
 360                int i;
 361
 362                len = linelen(line, size);
 363                if (!len || line[len-1] != '\n')
 364                        break;
 365                for (i = 0; i < sizeof(optable) / sizeof(optable[0]); i++) {
 366                        const struct opentry *p = optable + i;
 367                        int oplen = strlen(p->str);
 368                        if (len < oplen || memcmp(p->str, line, oplen))
 369                                continue;
 370                        if (p->fn(line + oplen, patch) < 0)
 371                                return offset;
 372                        break;
 373                }
 374        }
 375
 376        return offset;
 377}
 378
 379static int parse_num(const char *line, int len, int offset, const char *expect, unsigned long *p)
 380{
 381        char *ptr;
 382        int digits, ex;
 383
 384        if (offset < 0 || offset >= len)
 385                return -1;
 386        line += offset;
 387        len -= offset;
 388
 389        if (!isdigit(*line))
 390                return -1;
 391        *p = strtoul(line, &ptr, 10);
 392
 393        digits = ptr - line;
 394
 395        offset += digits;
 396        line += digits;
 397        len -= digits;
 398
 399        ex = strlen(expect);
 400        if (ex > len)
 401                return -1;
 402        if (memcmp(line, expect, ex))
 403                return -1;
 404
 405        return offset + ex;
 406}
 407
 408/*
 409 * Parse a unified diff fragment header of the
 410 * form "@@ -a,b +c,d @@"
 411 */
 412static int parse_fragment_header(char *line, int len, struct fragment *fragment)
 413{
 414        int offset;
 415
 416        if (!len || line[len-1] != '\n')
 417                return -1;
 418
 419        /* Figure out the number of lines in a fragment */
 420        offset = parse_num(line, len, 4, ",", &fragment->oldpos);
 421        offset = parse_num(line, len, offset, " +", &fragment->oldlines);
 422        offset = parse_num(line, len, offset, ",", &fragment->newpos);
 423        offset = parse_num(line, len, offset, " @@", &fragment->newlines);
 424
 425        return offset;
 426}
 427
 428static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch)
 429{
 430        unsigned long offset, len;
 431
 432        patch->is_rename = patch->is_copy = 0;
 433        patch->is_new = patch->is_delete = -1;
 434        patch->old_mode = patch->new_mode = 0;
 435        patch->old_name = patch->new_name = NULL;
 436        for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
 437                unsigned long nextlen;
 438
 439                len = linelen(line, size);
 440                if (!len)
 441                        break;
 442
 443                /* Testing this early allows us to take a few shortcuts.. */
 444                if (len < 6)
 445                        continue;
 446
 447                /*
 448                 * Make sure we don't find any unconnected patch fragmants.
 449                 * That's a sign that we didn't find a header, and that a
 450                 * patch has become corrupted/broken up.
 451                 */
 452                if (!memcmp("@@ -", line, 4)) {
 453                        struct fragment dummy;
 454                        if (parse_fragment_header(line, len, &dummy) < 0)
 455                                continue;
 456                        error("patch fragment without header at line %d: %.*s", linenr, len-1, line);
 457                }
 458
 459                if (size < len + 6)
 460                        break;
 461
 462                /*
 463                 * Git patch? It might not have a real patch, just a rename
 464                 * or mode change, so we handle that specially
 465                 */
 466                if (!memcmp("diff --git ", line, 11)) {
 467                        int git_hdr_len = parse_git_header(line, len, size, patch);
 468                        if (git_hdr_len < 0)
 469                                continue;
 470
 471                        *hdrsize = git_hdr_len;
 472                        return offset;
 473                }
 474
 475                /** --- followed by +++ ? */
 476                if (memcmp("--- ", line,  4) || memcmp("+++ ", line + len, 4))
 477                        continue;
 478
 479                /*
 480                 * We only accept unified patches, so we want it to
 481                 * at least have "@@ -a,b +c,d @@\n", which is 14 chars
 482                 * minimum
 483                 */
 484                nextlen = linelen(line + len, size - len);
 485                if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
 486                        continue;
 487
 488                /* Ok, we'll consider it a patch */
 489                parse_traditional_patch(line, line+len, patch);
 490                *hdrsize = len + nextlen;
 491                linenr += 2;
 492                return offset;
 493        }
 494        return -1;
 495}
 496
 497/*
 498 * Parse a unified diff. Note that this really needs
 499 * to parse each fragment separately, since the only
 500 * way to know the difference between a "---" that is
 501 * part of a patch, and a "---" that starts the next
 502 * patch is to look at the line counts..
 503 */
 504static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
 505{
 506        int len = linelen(line, size), offset;
 507        unsigned long pos[4], oldlines, newlines;
 508
 509        offset = parse_fragment_header(line, len, fragment);
 510        if (offset < 0)
 511                return -1;
 512        oldlines = fragment->oldlines;
 513        newlines = fragment->newlines;
 514
 515        if (patch->is_new < 0 && (pos[0] || oldlines))
 516                patch->is_new = 0;
 517        if (patch->is_delete < 0 && (pos[1] || newlines))
 518                patch->is_delete = 0;
 519
 520        /* Parse the thing.. */
 521        line += len;
 522        size -= len;
 523        linenr++;
 524        for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
 525                if (!oldlines && !newlines)
 526                        break;
 527                len = linelen(line, size);
 528                if (!len || line[len-1] != '\n')
 529                        return -1;
 530                switch (*line) {
 531                default:
 532                        return -1;
 533                case ' ':
 534                        oldlines--;
 535                        newlines--;
 536                        break;
 537                case '-':
 538                        oldlines--;
 539                        break;
 540                case '+':
 541                        newlines--;
 542                        break;
 543                }
 544        }
 545        return offset;
 546}
 547
 548static int parse_single_patch(char *line, unsigned long size, struct patch *patch)
 549{
 550        unsigned long offset = 0;
 551        struct fragment **fragp = &patch->fragments;
 552
 553        while (size > 4 && !memcmp(line, "@@ -", 4)) {
 554                struct fragment *fragment;
 555                int len;
 556
 557                fragment = xmalloc(sizeof(*fragment));
 558                memset(fragment, 0, sizeof(*fragment));
 559                len = parse_fragment(line, size, patch, fragment);
 560                if (len <= 0)
 561                        die("corrupt patch at line %d", linenr);
 562
 563                fragment->patch = line;
 564                fragment->size = len;
 565
 566                *fragp = fragment;
 567                fragp = &fragment->next;
 568
 569                offset += len;
 570                line += len;
 571                size -= len;
 572        }
 573        return offset;
 574}
 575
 576static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
 577{
 578        int hdrsize, patchsize;
 579        int offset = find_header(buffer, size, &hdrsize, patch);
 580
 581        if (offset < 0)
 582                return offset;
 583
 584        patchsize = parse_single_patch(buffer + offset + hdrsize, size - offset - hdrsize, patch);
 585
 586        return offset + hdrsize + patchsize;
 587}
 588
 589static void apply_patch_list(struct patch *patch)
 590{
 591        if (!patch)
 592                die("no patch found");
 593        do {
 594                const char *old_name = patch->old_name;
 595                const char *new_name = patch->new_name;
 596                struct fragment *frag;
 597
 598                if (old_name) {
 599                        if (cache_name_pos(old_name, strlen(old_name)) < 0)
 600                                die("file %s does not exist", old_name);
 601                        if (patch->is_new < 0)
 602                                patch->is_new = 0;
 603                }
 604                if (new_name && (patch->is_new | patch->is_rename | patch->is_copy)) {
 605                        if (cache_name_pos(new_name, strlen(new_name)) >= 0)
 606                                die("file %s already exists", new_name);
 607                }
 608
 609                printf("Applying patch to %s\n", new_name);
 610                printf("  new=%d delete=%d copy=%d rename=%d\n",
 611                        patch->is_new, patch->is_delete, patch->is_copy, patch->is_rename);
 612                if (patch->old_mode != patch->new_mode)
 613                        printf("  %o->%o\n", patch->old_mode, patch->new_mode);
 614                frag = patch->fragments;
 615                while (frag) {
 616                        printf("Fragment %lu,%lu -> %lu,%lu\n%.*s",
 617                                frag->oldpos, frag->oldlines,
 618                                frag->newpos, frag->newlines,
 619                                frag->size, frag->patch);
 620                        frag = frag->next;
 621                }
 622
 623        } while ((patch = patch->next) != NULL);
 624}
 625
 626static int apply_patch(int fd)
 627{
 628        unsigned long offset, size;
 629        char *buffer = read_patch_file(fd, &size);
 630        struct patch *list = NULL, **listp = &list;
 631
 632        if (!buffer)
 633                return -1;
 634        offset = 0;
 635        while (size > 0) {
 636                struct patch *patch;
 637                int nr;
 638
 639                patch = xmalloc(sizeof(*patch));
 640                memset(patch, 0, sizeof(*patch));
 641                nr = parse_chunk(buffer + offset, size, patch);
 642                if (nr < 0)
 643                        break;
 644                *listp = patch;
 645                listp = &patch->next;
 646                offset += nr;
 647                size -= nr;
 648        }
 649
 650        apply_patch_list(list);
 651
 652        free(buffer);
 653        return 0;
 654}
 655
 656int main(int argc, char **argv)
 657{
 658        int i;
 659        int read_stdin = 1;
 660
 661        if (read_cache() < 0)
 662                die("unable to read index file");
 663
 664        for (i = 1; i < argc; i++) {
 665                const char *arg = argv[i];
 666                int fd;
 667
 668                if (!strcmp(arg, "-")) {
 669                        apply_patch(0);
 670                        read_stdin = 0;
 671                        continue;
 672                }
 673                if (!strcmp(arg, "--no-merge")) {
 674                        merge_patch = 0;
 675                        continue;
 676                }
 677                fd = open(arg, O_RDONLY);
 678                if (fd < 0)
 679                        usage(apply_usage);
 680                read_stdin = 0;
 681                apply_patch(fd);
 682                close(fd);
 683        }
 684        if (read_stdin)
 685                apply_patch(0);
 686        return 0;
 687}