apply.con commit git-apply: parse the diff headers (both traditional and new) (a4acb0e)
   1/*
   2 * apply.c
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 *
   6 * This applies patches on top of some (arbitrary) version of the SCM.
   7 *
   8 * NOTE! It does all its work in the index file, and only cares about
   9 * the files in the working directory if you tell it to "merge" the
  10 * patch apply.
  11 *
  12 * Even when merging it always takes the source from the index, and
  13 * uses the working tree as a "branch" for a 3-way merge.
  14 */
  15#include <ctype.h>
  16
  17#include "cache.h"
  18
  19// We default to the merge behaviour, since that's what most people would
  20// expect
  21static int merge_patch = 1;
  22static const char apply_usage[] = "git-apply <patch>";
  23
  24/*
  25 * Various "current state", notably line numbers and what
  26 * file (and how) we're patching right now.. The "is_xxxx"
  27 * things are flags, where -1 means "don't know yet".
  28 */
  29static int linenr = 1;
  30static int old_mode, new_mode;
  31static char *old_name, *new_name, *def_name;
  32static int is_rename, is_copy, is_new, is_delete;
  33
  34#define CHUNKSIZE (8192)
  35#define SLOP (16)
  36
  37static void *read_patch_file(int fd, unsigned long *sizep)
  38{
  39        unsigned long size = 0, alloc = CHUNKSIZE;
  40        void *buffer = xmalloc(alloc);
  41
  42        for (;;) {
  43                int nr = alloc - size;
  44                if (nr < 1024) {
  45                        alloc += CHUNKSIZE;
  46                        buffer = xrealloc(buffer, alloc);
  47                        nr = alloc - size;
  48                }
  49                nr = read(fd, buffer + size, nr);
  50                if (!nr)
  51                        break;
  52                if (nr < 0) {
  53                        if (errno == EAGAIN)
  54                                continue;
  55                        die("git-apply: read returned %s", strerror(errno));
  56                }
  57                size += nr;
  58        }
  59        *sizep = size;
  60
  61        /*
  62         * Make sure that we have some slop in the buffer
  63         * so that we can do speculative "memcmp" etc, and
  64         * see to it that it is NUL-filled.
  65         */
  66        if (alloc < size + SLOP)
  67                buffer = xrealloc(buffer, size + SLOP);
  68        memset(buffer + size, 0, SLOP);
  69        return buffer;
  70}
  71
  72static unsigned long linelen(char *buffer, unsigned long size)
  73{
  74        unsigned long len = 0;
  75        while (size--) {
  76                len++;
  77                if (*buffer++ == '\n')
  78                        break;
  79        }
  80        return len;
  81}
  82
  83static int is_dev_null(const char *str)
  84{
  85        return !memcmp("/dev/null", str, 9) && isspace(str[9]);
  86}
  87
  88static char * find_name(const char *line, char *def, int p_value)
  89{
  90        int len;
  91        const char *start = line;
  92        char *name;
  93
  94        for (;;) {
  95                char c = *line;
  96                if (isspace(c))
  97                        break;
  98                line++;
  99                if (c == '/' && !--p_value)
 100                        start = line;
 101        }
 102        if (!start)
 103                return def;
 104        len = line - start;
 105        if (!len)
 106                return def;
 107
 108        /*
 109         * Generally we prefer the shorter name, especially
 110         * if the other one is just a variation of that with
 111         * something else tacked on to the end (ie "file.orig"
 112         * or "file~").
 113         */
 114        if (def) {
 115                int deflen = strlen(def);
 116                if (deflen < len && !strncmp(start, def, deflen))
 117                        return def;
 118        }
 119
 120        name = xmalloc(len + 1);
 121        memcpy(name, start, len);
 122        name[len] = 0;
 123        free(def);
 124        return name;
 125}
 126
 127/*
 128 * Get the name etc info from the --/+++ lines of a traditional patch header
 129 *
 130 * NOTE! This hardcodes "-p1" behaviour in filename detection.
 131 */
 132static int parse_traditional_patch(const char *first, const char *second)
 133{
 134        int p_value = 1;
 135        char *name;
 136
 137        first += 4;     // skip "--- "
 138        second += 4;    // skip "+++ "
 139        if (is_dev_null(first)) {
 140                is_new = 1;
 141                name = find_name(second, def_name, p_value);
 142        } else if (is_dev_null(second)) {
 143                is_delete = 1;
 144                name = find_name(first, def_name, p_value);
 145        } else {
 146                name = find_name(first, def_name, p_value);
 147                name = find_name(second, name, p_value);
 148        }
 149        if (!name)
 150                die("unable to find filename in patch at line %d", linenr);
 151        old_name = name;
 152        new_name = name;
 153}
 154
 155static int gitdiff_hdrend(const char *line)
 156{
 157        return -1;
 158}
 159
 160static int gitdiff_oldname(const char *line)
 161{
 162        if (!old_name)
 163                old_name = find_name(line, NULL, 1);
 164        return 0;
 165}
 166
 167static int gitdiff_newname(const char *line)
 168{
 169        if (!new_name)
 170                new_name = find_name(line, NULL, 1);
 171        return 0;
 172}
 173
 174static int gitdiff_oldmode(const char *line)
 175{
 176        old_mode = strtoul(line, NULL, 8);
 177        return 0;
 178}
 179
 180static int gitdiff_newmode(const char *line)
 181{
 182        new_mode = strtoul(line, NULL, 8);
 183        return 0;
 184}
 185
 186static int gitdiff_delete(const char *line)
 187{
 188        is_delete = 1;
 189        return gitdiff_oldmode(line);
 190}
 191
 192static int gitdiff_newfile(const char *line)
 193{
 194        is_new = 1;
 195        return gitdiff_newmode(line);
 196}
 197
 198static int gitdiff_copysrc(const char *line)
 199{
 200        is_copy = 1;
 201        old_name = find_name(line, NULL, 0);
 202        return 0;
 203}
 204
 205static int gitdiff_copydst(const char *line)
 206{
 207        is_copy = 1;
 208        new_name = find_name(line, NULL, 0);
 209        return 0;
 210}
 211
 212static int gitdiff_renamesrc(const char *line)
 213{
 214        is_rename = 1;
 215        old_name = find_name(line, NULL, 0);
 216        return 0;
 217}
 218
 219static int gitdiff_renamedst(const char *line)
 220{
 221        is_rename = 1;
 222        new_name = find_name(line, NULL, 0);
 223        return 0;
 224}
 225
 226static int gitdiff_similarity(const char *line)
 227{
 228        return 0;
 229}
 230
 231/* Verify that we recognize the lines following a git header */
 232static int parse_git_header(char *line, int len, unsigned int size)
 233{
 234        unsigned long offset;
 235
 236        /* A git diff has explicit new/delete information, so we don't guess */
 237        is_new = 0;
 238        is_delete = 0;
 239
 240        line += len;
 241        size -= len;
 242        linenr++;
 243        for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
 244                static const struct opentry {
 245                        const char *str;
 246                        int (*fn)(const char *);
 247                } optable[] = {
 248                        { "@@ -", gitdiff_hdrend },
 249                        { "--- ", gitdiff_oldname },
 250                        { "+++ ", gitdiff_newname },
 251                        { "old mode ", gitdiff_oldmode },
 252                        { "new mode ", gitdiff_newmode },
 253                        { "deleted file mode ", gitdiff_delete },
 254                        { "new file mode ", gitdiff_newfile },
 255                        { "copy from ", gitdiff_copysrc },
 256                        { "copy to ", gitdiff_copydst },
 257                        { "rename from ", gitdiff_renamesrc },
 258                        { "rename to ", gitdiff_renamedst },
 259                        { "similarity index ", gitdiff_similarity },
 260                };
 261                int i;
 262
 263                len = linelen(line, size);
 264                if (!len || line[len-1] != '\n')
 265                        break;
 266                for (i = 0; i < sizeof(optable) / sizeof(optable[0]); i++) {
 267                        const struct opentry *p = optable + i;
 268                        int oplen = strlen(p->str);
 269                        if (len < oplen || memcmp(p->str, line, oplen))
 270                                continue;
 271                        if (p->fn(line + oplen) < 0)
 272                                return offset;
 273                }
 274        }
 275
 276        return offset;
 277}
 278
 279static int parse_num(const char *line, int len, int offset, const char *expect, unsigned long *p)
 280{
 281        char *ptr;
 282        int digits, ex;
 283
 284        if (offset < 0 || offset >= len)
 285                return -1;
 286        line += offset;
 287        len -= offset;
 288
 289        if (!isdigit(*line))
 290                return -1;
 291        *p = strtoul(line, &ptr, 10);
 292
 293        digits = ptr - line;
 294
 295        offset += digits;
 296        line += digits;
 297        len -= digits;
 298
 299        ex = strlen(expect);
 300        if (ex > len)
 301                return -1;
 302        if (memcmp(line, expect, ex))
 303                return -1;
 304
 305        return offset + ex;
 306}
 307
 308/*
 309 * Parse a unified diff fragment header of the
 310 * form "@@ -a,b +c,d @@"
 311 */
 312static int parse_fragment_header(char *line, int len, unsigned long *pos)
 313{
 314        int offset;
 315
 316        if (!len || line[len-1] != '\n')
 317                return -1;
 318
 319        /* Figure out the number of lines in a fragment */
 320        offset = parse_num(line, len, 4, ",", pos);
 321        offset = parse_num(line, len, offset, " +", pos+1);
 322        offset = parse_num(line, len, offset, ",", pos+2);
 323        offset = parse_num(line, len, offset, " @@", pos+3);
 324
 325        return offset;
 326}
 327
 328static int find_header(char *line, unsigned long size, int *hdrsize)
 329{
 330        unsigned long offset, len;
 331
 332        is_rename = is_copy = 0;
 333        is_new = is_delete = -1;
 334        old_mode = new_mode = -1;
 335        def_name = old_name = new_name = NULL;
 336        for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
 337                unsigned long nextlen;
 338
 339                len = linelen(line, size);
 340                if (!len)
 341                        break;
 342
 343                /* Testing this early allows us to take a few shortcuts.. */
 344                if (len < 6)
 345                        continue;
 346
 347                /*
 348                 * Make sure we don't find any unconnected patch fragmants.
 349                 * That's a sign that we didn't find a header, and that a
 350                 * patch has become corrupted/broken up.
 351                 */
 352                if (!memcmp("@@ -", line, 4)) {
 353                        unsigned long pos[4];
 354                        if (parse_fragment_header(line, len, pos) < 0)
 355                                continue;
 356                        error("patch fragment without header at line %d: %.*s", linenr, len-1, line);
 357                }
 358
 359                if (size < len + 6)
 360                        break;
 361
 362                /*
 363                 * Git patch? It might not have a real patch, just a rename
 364                 * or mode change, so we handle that specially
 365                 */
 366                if (!memcmp("diff --git ", line, 11)) {
 367                        int git_hdr_len = parse_git_header(line, len, size);
 368                        if (git_hdr_len < 0)
 369                                continue;
 370
 371                        *hdrsize = git_hdr_len;
 372                        return offset;
 373                }
 374
 375                /** --- followed by +++ ? */
 376                if (memcmp("--- ", line,  4) || memcmp("+++ ", line + len, 4))
 377                        continue;
 378
 379                /*
 380                 * We only accept unified patches, so we want it to
 381                 * at least have "@@ -a,b +c,d @@\n", which is 14 chars
 382                 * minimum
 383                 */
 384                nextlen = linelen(line + len, size - len);
 385                if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
 386                        continue;
 387
 388                /* Ok, we'll consider it a patch */
 389                parse_traditional_patch(line, line+len);
 390                *hdrsize = len + nextlen;
 391                linenr += 2;
 392                return offset;
 393        }
 394        return -1;
 395}
 396
 397/*
 398 * Parse a unified diff. Note that this really needs
 399 * to parse each fragment separately, since the only
 400 * way to know the difference between a "---" that is
 401 * part of a patch, and a "---" that starts the next
 402 * patch is to look at the line counts..
 403 */
 404static int apply_fragment(char *line, unsigned long size)
 405{
 406        int len = linelen(line, size), offset;
 407        unsigned long pos[4], oldlines, newlines;
 408
 409        offset = parse_fragment_header(line, len, pos);
 410        if (offset < 0)
 411                return -1;
 412        oldlines = pos[1];
 413        newlines = pos[3];
 414
 415        if (is_new < 0 && (pos[0] || oldlines))
 416                is_new = 0;
 417        if (is_delete < 0 && (pos[1] || newlines))
 418                is_delete = 0;
 419
 420        /* Parse the thing.. */
 421        line += len;
 422        size -= len;
 423        linenr++;
 424        for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
 425                if (!oldlines && !newlines)
 426                        break;
 427                len = linelen(line, size);
 428                if (!len || line[len-1] != '\n')
 429                        return -1;
 430                switch (*line) {
 431                default:
 432                        return -1;
 433                case ' ':
 434                        oldlines--;
 435                        newlines--;
 436                        break;
 437                case '-':
 438                        oldlines--;
 439                        break;
 440                case '+':
 441                        newlines--;
 442                        break;
 443                }
 444        }
 445        return offset;
 446}
 447
 448static int apply_single_patch(char *line, unsigned long size)
 449{
 450        unsigned long offset = 0;
 451
 452        while (size > 4 && !memcmp(line, "@@ -", 4)) {
 453                int len = apply_fragment(line, size);
 454                if (len <= 0)
 455                        die("corrupt patch at line %d", linenr);
 456
 457printf("applying fragment:\n%.*s\n\n", len, line);
 458
 459                offset += len;
 460                line += len;
 461                size -= len;
 462        }
 463        return offset;
 464}
 465
 466static int apply_chunk(char *buffer, unsigned long size)
 467{
 468        int hdrsize, patchsize;
 469        int offset = find_header(buffer, size, &hdrsize);
 470        char *header, *patch;
 471
 472        if (offset < 0)
 473                return offset;
 474        header = buffer + offset;
 475
 476printf("Found header:\n%.*s\n\n", hdrsize, header);
 477printf("Rename: %d\n", is_rename);
 478printf("Copy:   %d\n", is_copy);
 479printf("New:    %d\n", is_new);
 480printf("Delete: %d\n", is_delete);
 481printf("Mode:   %o->%o\n", old_mode, new_mode);
 482printf("Name:   '%s'->'%s'\n", old_name, new_name);
 483
 484        patch = header + hdrsize;
 485        patchsize = apply_single_patch(patch, size - offset - hdrsize);
 486
 487        return offset + hdrsize + patchsize;
 488}
 489
 490static int apply_patch(int fd)
 491{
 492        unsigned long offset, size;
 493        char *buffer = read_patch_file(fd, &size);
 494
 495        if (!buffer)
 496                return -1;
 497        offset = 0;
 498        while (size > 0) {
 499                int nr = apply_chunk(buffer + offset, size);
 500                if (nr < 0)
 501                        break;
 502                offset += nr;
 503                size -= nr;
 504        }
 505        free(buffer);
 506        return 0;
 507}
 508
 509int main(int argc, char **argv)
 510{
 511        int i;
 512
 513        if (read_cache() < 0)
 514                die("unable to read index file");
 515
 516        for (i = 1; i < argc; i++) {
 517                const char *arg = argv[i];
 518                int fd;
 519
 520                if (!strcmp(arg, "-")) {
 521                        apply_patch(0);
 522                        continue;
 523                }
 524                if (!strcmp(arg, "--no-merge")) {
 525                        merge_patch = 0;
 526                        continue;
 527                }
 528                fd = open(arg, O_RDONLY);
 529                if (fd < 0)
 530                        usage(apply_usage);
 531                apply_patch(fd);
 532                close(fd);
 533        }
 534        return 0;
 535}