apply: do not barf on patch with too large an offset
[gitweb.git] / builtin-apply.c
index 8edcc08b61e6ab41098154afe1f6741f987978d7..5ed4e918c0873b641a2f88b08bff9c66d6c4d850 100644 (file)
@@ -45,14 +45,14 @@ static const char *fake_ancestor;
 static int line_termination = '\n';
 static unsigned long p_context = ULONG_MAX;
 static const char apply_usage[] =
-"git-apply [--stat] [--numstat] [--summary] [--check] [--index] [--cached] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [--reverse] [--reject] [--verbose] [-z] [-pNUM] [-CNUM] [--whitespace=<nowarn|warn|error|error-all|strip>] <patch>...";
-
-static enum whitespace_eol {
-       nowarn_whitespace,
-       warn_on_whitespace,
-       error_on_whitespace,
-       strip_whitespace,
-} new_whitespace = warn_on_whitespace;
+"git-apply [--stat] [--numstat] [--summary] [--check] [--index] [--cached] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [--reverse] [--reject] [--verbose] [-z] [-pNUM] [-CNUM] [--whitespace=<nowarn|warn|fix|error|error-all>] <patch>...";
+
+static enum ws_error_action {
+       nowarn_ws_error,
+       warn_on_ws_error,
+       die_on_ws_error,
+       correct_ws_error,
+} ws_error_action = warn_on_ws_error;
 static int whitespace_error;
 static int squelch_whitespace_errors = 5;
 static int applied_after_fixing_ws;
@@ -61,28 +61,28 @@ static const char *patch_input_file;
 static void parse_whitespace_option(const char *option)
 {
        if (!option) {
-               new_whitespace = warn_on_whitespace;
+               ws_error_action = warn_on_ws_error;
                return;
        }
        if (!strcmp(option, "warn")) {
-               new_whitespace = warn_on_whitespace;
+               ws_error_action = warn_on_ws_error;
                return;
        }
        if (!strcmp(option, "nowarn")) {
-               new_whitespace = nowarn_whitespace;
+               ws_error_action = nowarn_ws_error;
                return;
        }
        if (!strcmp(option, "error")) {
-               new_whitespace = error_on_whitespace;
+               ws_error_action = die_on_ws_error;
                return;
        }
        if (!strcmp(option, "error-all")) {
-               new_whitespace = error_on_whitespace;
+               ws_error_action = die_on_ws_error;
                squelch_whitespace_errors = 0;
                return;
        }
-       if (!strcmp(option, "strip")) {
-               new_whitespace = strip_whitespace;
+       if (!strcmp(option, "strip") || !strcmp(option, "fix")) {
+               ws_error_action = correct_ws_error;
                return;
        }
        die("unrecognized whitespace option '%s'", option);
@@ -90,11 +90,8 @@ static void parse_whitespace_option(const char *option)
 
 static void set_default_whitespace_mode(const char *whitespace_option)
 {
-       if (!whitespace_option && !apply_default_whitespace) {
-               new_whitespace = (apply
-                                 ? warn_on_whitespace
-                                 : nowarn_whitespace);
-       }
+       if (!whitespace_option && !apply_default_whitespace)
+               ws_error_action = (apply ? warn_on_ws_error : nowarn_ws_error);
 }
 
 /*
@@ -137,11 +134,17 @@ struct fragment {
 #define BINARY_DELTA_DEFLATED  1
 #define BINARY_LITERAL_DEFLATED 2
 
+/*
+ * This represents a "patch" to a file, both metainfo changes
+ * such as creation/deletion, filemode and content changes represented
+ * as a series of fragments.
+ */
 struct patch {
        char *new_name, *old_name, *def_name;
        unsigned int old_mode, new_mode;
        int is_new, is_delete;  /* -1 = unknown, 0 = false, 1 = true */
        int rejected;
+       unsigned ws_rule;
        unsigned long deflate_origlen;
        int lines_added, lines_deleted;
        int score;
@@ -158,7 +161,86 @@ struct patch {
        struct patch *next;
 };
 
-static void say_patch_name(FILE *output, const char *pre, struct patch *patch, const char *post)
+/*
+ * A line in a file, len-bytes long (includes the terminating LF,
+ * except for an incomplete line at the end if the file ends with
+ * one), and its contents hashes to 'hash'.
+ */
+struct line {
+       size_t len;
+       unsigned hash : 24;
+       unsigned flag : 8;
+#define LINE_COMMON     1
+};
+
+/*
+ * This represents a "file", which is an array of "lines".
+ */
+struct image {
+       char *buf;
+       size_t len;
+       size_t nr;
+       size_t alloc;
+       struct line *line_allocated;
+       struct line *line;
+};
+
+static uint32_t hash_line(const char *cp, size_t len)
+{
+       size_t i;
+       uint32_t h;
+       for (i = 0, h = 0; i < len; i++) {
+               if (!isspace(cp[i])) {
+                       h = h * 3 + (cp[i] & 0xff);
+               }
+       }
+       return h;
+}
+
+static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
+{
+       ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
+       img->line_allocated[img->nr].len = len;
+       img->line_allocated[img->nr].hash = hash_line(bol, len);
+       img->line_allocated[img->nr].flag = flag;
+       img->nr++;
+}
+
+static void prepare_image(struct image *image, char *buf, size_t len,
+                         int prepare_linetable)
+{
+       const char *cp, *ep;
+
+       memset(image, 0, sizeof(*image));
+       image->buf = buf;
+       image->len = len;
+
+       if (!prepare_linetable)
+               return;
+
+       ep = image->buf + image->len;
+       cp = image->buf;
+       while (cp < ep) {
+               const char *next;
+               for (next = cp; next < ep && *next != '\n'; next++)
+                       ;
+               if (next < ep)
+                       next++;
+               add_line_info(image, cp, next - cp, 0);
+               cp = next;
+       }
+       image->line = image->line_allocated;
+}
+
+static void clear_image(struct image *image)
+{
+       free(image->buf);
+       image->buf = NULL;
+       image->len = 0;
+}
+
+static void say_patch_name(FILE *output, const char *pre,
+                          struct patch *patch, const char *post)
 {
        fputs(pre, output);
        if (patch->old_name && patch->new_name &&
@@ -229,7 +311,8 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
        if (*line == '"') {
                struct strbuf name;
 
-               /* Proposed "new-style" GNU patch/diff format; see
+               /*
+                * Proposed "new-style" GNU patch/diff format; see
                 * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
                 */
                strbuf_init(&name, 0);
@@ -499,7 +582,8 @@ static int gitdiff_dissimilarity(const char *line, struct patch *patch)
 
 static int gitdiff_index(const char *line, struct patch *patch)
 {
-       /* index line is N hexadecimal, "..", N hexadecimal,
+       /*
+        * index line is N hexadecimal, "..", N hexadecimal,
         * and optional space with octal mode.
         */
        const char *ptr, *eol;
@@ -550,7 +634,8 @@ static const char *stop_at_slash(const char *line, int llen)
        return NULL;
 }
 
-/* This is to extract the same name that appears on "diff --git"
+/*
+ * This is to extract the same name that appears on "diff --git"
  * line.  We do not find and return anything if it is a rename
  * patch, and it is OK because we will find the name elsewhere.
  * We need to reliably find name only when it is mode-change only,
@@ -584,7 +669,8 @@ static char *git_header_name(char *line, int llen)
                        goto free_and_fail1;
                strbuf_remove(&first, 0, cp + 1 - first.buf);
 
-               /* second points at one past closing dq of name.
+               /*
+                * second points at one past closing dq of name.
                 * find the second name.
                 */
                while ((second < line + llen) && isspace(*second))
@@ -627,7 +713,8 @@ static char *git_header_name(char *line, int llen)
                return NULL;
        name++;
 
-       /* since the first name is unquoted, a dq if exists must be
+       /*
+        * since the first name is unquoted, a dq if exists must be
         * the beginning of the second name.
         */
        for (second = name; second < line + llen; second++) {
@@ -683,7 +770,6 @@ static char *git_header_name(char *line, int llen)
                        }
                }
        }
-       return NULL;
 }
 
 /* Verify that we recognize the lines following a git header */
@@ -759,7 +845,7 @@ static int parse_num(const char *line, unsigned long *p)
 }
 
 static int parse_range(const char *line, int len, int offset, const char *expect,
-                       unsigned long *p1, unsigned long *p2)
+                      unsigned long *p1, unsigned long *p2)
 {
        int digits, ex;
 
@@ -868,14 +954,14 @@ static int find_header(char *line, unsigned long size, int *hdrsize, struct patc
                        return offset;
                }
 
-               /** --- followed by +++ ? */
+               /* --- followed by +++ ? */
                if (memcmp("--- ", line,  4) || memcmp("+++ ", line + len, 4))
                        continue;
 
                /*
                 * We only accept unified patches, so we want it to
                 * at least have "@@ -a,b +c,d @@\n", which is 14 chars
-                * minimum
+                * minimum ("@@ -0,0 +1 @@\n" is the shortest).
                 */
                nextlen = linelen(line + len, size - len);
                if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
@@ -890,56 +976,34 @@ static int find_header(char *line, unsigned long size, int *hdrsize, struct patc
        return -1;
 }
 
-static void check_whitespace(const char *line, int len)
+static void check_whitespace(const char *line, int len, unsigned ws_rule)
 {
-       const char *err = "Adds trailing whitespace";
-       int seen_space = 0;
-       int i;
-
-       /*
-        * We know len is at least two, since we have a '+' and we
-        * checked that the last character was a '\n' before calling
-        * this function.  That is, an addition of an empty line would
-        * check the '+' here.  Sneaky...
-        */
-       if (isspace(line[len-2]))
-               goto error;
-
-       /*
-        * Make sure that there is no space followed by a tab in
-        * indentation.
-        */
-       err = "Space in indent is followed by a tab";
-       for (i = 1; i < len; i++) {
-               if (line[i] == '\t') {
-                       if (seen_space)
-                               goto error;
-               }
-               else if (line[i] == ' ')
-                       seen_space = 1;
-               else
-                       break;
-       }
-       return;
+       char *err;
+       unsigned result = check_and_emit_line(line + 1, len - 1, ws_rule,
+           NULL, NULL, NULL, NULL);
+       if (!result)
+               return;
 
- error:
        whitespace_error++;
        if (squelch_whitespace_errors &&
            squelch_whitespace_errors < whitespace_error)
                ;
-       else
-               fprintf(stderr, "%s.\n%s:%d:%.*s\n",
-                       err, patch_input_file, linenr, len-2, line+1);
+       else {
+               err = whitespace_error_string(result);
+               fprintf(stderr, "%s:%d: %s.\n%.*s\n",
+                    patch_input_file, linenr, err, len - 2, line + 1);
+               free(err);
+       }
 }
 
-
 /*
  * Parse a unified diff. Note that this really needs to parse each
  * fragment separately, since the only way to know the difference
  * between a "---" that is part of a patch, and a "---" that starts
  * the next patch is to look at the line counts..
  */
-static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
+static int parse_fragment(char *line, unsigned long size,
+                         struct patch *patch, struct fragment *fragment)
 {
        int added, deleted;
        int len = linelen(line, size), offset;
@@ -980,22 +1044,23 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s
                        break;
                case '-':
                        if (apply_in_reverse &&
-                                       new_whitespace != nowarn_whitespace)
-                               check_whitespace(line, len);
+                           ws_error_action != nowarn_ws_error)
+                               check_whitespace(line, len, patch->ws_rule);
                        deleted++;
                        oldlines--;
                        trailing = 0;
                        break;
                case '+':
                        if (!apply_in_reverse &&
-                                       new_whitespace != nowarn_whitespace)
-                               check_whitespace(line, len);
+                           ws_error_action != nowarn_ws_error)
+                               check_whitespace(line, len, patch->ws_rule);
                        added++;
                        newlines--;
                        trailing = 0;
                        break;
 
-                /* We allow "\ No newline at end of file". Depending
+               /*
+                * We allow "\ No newline at end of file". Depending
                  * on locale settings when the patch was produced we
                  * don't know what this line looks like. The only
                  * thing we do know is that it begins with "\ ".
@@ -1013,7 +1078,8 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s
        fragment->leading = leading;
        fragment->trailing = trailing;
 
-       /* If a fragment ends with an incomplete line, we failed to include
+       /*
+        * If a fragment ends with an incomplete line, we failed to include
         * it in the above loop because we hit oldlines == newlines == 0
         * before seeing it.
         */
@@ -1141,7 +1207,8 @@ static struct fragment *parse_binary_hunk(char **buf_p,
                                          int *status_p,
                                          int *used_p)
 {
-       /* Expect a line that begins with binary patch method ("literal"
+       /*
+        * Expect a line that begins with binary patch method ("literal"
         * or "delta"), followed by the length of data before deflating.
         * a sequence of 'length-byte' followed by base-85 encoded data
         * should follow, terminated by a newline.
@@ -1190,7 +1257,8 @@ static struct fragment *parse_binary_hunk(char **buf_p,
                        size--;
                        break;
                }
-               /* Minimum line is "A00000\n" which is 7-byte long,
+               /*
+                * Minimum line is "A00000\n" which is 7-byte long,
                 * and the line length must be multiple of 5 plus 2.
                 */
                if ((llen < 7) || (llen-2) % 5)
@@ -1241,7 +1309,8 @@ static struct fragment *parse_binary_hunk(char **buf_p,
 
 static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
 {
-       /* We have read "GIT binary patch\n"; what follows is a line
+       /*
+        * We have read "GIT binary patch\n"; what follows is a line
         * that says the patch method (currently, either "literal" or
         * "delta") and the length of data before deflating; a
         * sequence of 'length-byte' followed by base-85 encoded data
@@ -1271,7 +1340,8 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
        if (reverse)
                used += used_1;
        else if (status) {
-               /* not having reverse hunk is not an error, but having
+               /*
+                * Not having reverse hunk is not an error, but having
                 * a corrupt reverse hunk is.
                 */
                free((void*) forward->patch);
@@ -1292,7 +1362,12 @@ static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
        if (offset < 0)
                return offset;
 
-       patchsize = parse_single_patch(buffer + offset + hdrsize, size - offset - hdrsize, patch);
+       patch->ws_rule = whitespace_rule(patch->new_name
+                                        ? patch->new_name
+                                        : patch->old_name);
+
+       patchsize = parse_single_patch(buffer + offset + hdrsize,
+                                      size - offset - hdrsize, patch);
 
        if (!patchsize) {
                static const char *binhdr[] = {
@@ -1368,8 +1443,10 @@ static void reverse_patches(struct patch *p)
        }
 }
 
-static const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
-static const char minuses[]= "----------------------------------------------------------------------";
+static const char pluses[] =
+"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
+static const char minuses[]=
+"----------------------------------------------------------------------";
 
 static void show_stats(struct patch *patch)
 {
@@ -1438,155 +1515,81 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
        }
 }
 
-static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line, int *lines)
+static int copy_wsfix(char *output, const char *patch, int plen,
+                     unsigned ws_rule, int count_error)
 {
-       int i;
-       unsigned long start, backwards, forwards;
-
-       if (fragsize > size)
-               return -1;
-
-       start = 0;
-       if (line > 1) {
-               unsigned long offset = 0;
-               i = line-1;
-               while (offset + fragsize <= size) {
-                       if (buf[offset++] == '\n') {
-                               start = offset;
-                               if (!--i)
-                                       break;
-                       }
-               }
-       }
-
-       /* Exact line number? */
-       if ((start + fragsize <= size) &&
-           !memcmp(buf + start, fragment, fragsize))
-               return start;
-
-       /*
-        * There's probably some smart way to do this, but I'll leave
-        * that to the smart and beautiful people. I'm simple and stupid.
-        */
-       backwards = start;
-       forwards = start;
-       for (i = 0; ; i++) {
-               unsigned long try;
-               int n;
-
-               /* "backward" */
-               if (i & 1) {
-                       if (!backwards) {
-                               if (forwards + fragsize > size)
-                                       break;
-                               continue;
-                       }
-                       do {
-                               --backwards;
-                       } while (backwards && buf[backwards-1] != '\n');
-                       try = backwards;
-               } else {
-                       while (forwards + fragsize <= size) {
-                               if (buf[forwards++] == '\n')
-                                       break;
-                       }
-                       try = forwards;
-               }
-
-               if (try + fragsize > size)
-                       continue;
-               if (memcmp(buf + try, fragment, fragsize))
-                       continue;
-               n = (i >> 1)+1;
-               if (i & 1)
-                       n = -n;
-               *lines = n;
-               return try;
-       }
-
        /*
-        * We should start searching forward and backward.
-        */
-       return -1;
-}
-
-static void remove_first_line(const char **rbuf, int *rsize)
-{
-       const char *buf = *rbuf;
-       int size = *rsize;
-       unsigned long offset;
-       offset = 0;
-       while (offset <= size) {
-               if (buf[offset++] == '\n')
-                       break;
-       }
-       *rsize = size - offset;
-       *rbuf = buf + offset;
-}
-
-static void remove_last_line(const char **rbuf, int *rsize)
-{
-       const char *buf = *rbuf;
-       int size = *rsize;
-       unsigned long offset;
-       offset = size - 1;
-       while (offset > 0) {
-               if (buf[--offset] == '\n')
-                       break;
-       }
-       *rsize = offset + 1;
-}
-
-static int apply_line(char *output, const char *patch, int plen)
-{
-       /* plen is number of bytes to be copied from patch,
-        * starting at patch+1 (patch[0] is '+').  Typically
-        * patch[plen] is '\n', unless this is the incomplete
-        * last line.
+        * plen is number of bytes to be copied from patch, starting
+        * at patch.  Typically patch[plen-1] is '\n', unless this is
+        * the incomplete last line.
         */
        int i;
        int add_nl_to_tail = 0;
+       int add_cr_to_tail = 0;
        int fixed = 0;
        int last_tab_in_indent = -1;
        int last_space_in_indent = -1;
        int need_fix_leading_space = 0;
        char *buf;
 
-       if ((new_whitespace != strip_whitespace) || !whitespace_error ||
-           *patch != '+') {
-               memcpy(output, patch + 1, plen);
-               return plen;
-       }
-
-       if (1 < plen && isspace(patch[plen-1])) {
-               if (patch[plen] == '\n')
+       /*
+        * Strip trailing whitespace
+        */
+       if ((ws_rule & WS_TRAILING_SPACE) &&
+           (2 < plen && isspace(patch[plen-2]))) {
+               if (patch[plen - 1] == '\n') {
                        add_nl_to_tail = 1;
-               plen--;
-               while (0 < plen && isspace(patch[plen]))
                        plen--;
-               fixed = 1;
+                       if (1 < plen && patch[plen - 1] == '\r') {
+                               add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);
+                               plen--;
+                       }
+               }
+               if (0 < plen && isspace(patch[plen - 1])) {
+                       while (0 < plen && isspace(patch[plen-1]))
+                               plen--;
+                       fixed = 1;
+               }
        }
 
-       for (i = 1; i < plen; i++) {
+       /*
+        * Check leading whitespaces (indent)
+        */
+       for (i = 0; i < plen; i++) {
                char ch = patch[i];
                if (ch == '\t') {
                        last_tab_in_indent = i;
-                       if (0 <= last_space_in_indent)
-                               need_fix_leading_space = 1;
-               }
-               else if (ch == ' ')
+                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
+                           0 <= last_space_in_indent)
+                           need_fix_leading_space = 1;
+               } else if (ch == ' ') {
                        last_space_in_indent = i;
-               else
+                       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
+                           8 <= i - last_tab_in_indent)
+                               need_fix_leading_space = 1;
+               } else
                        break;
        }
 
        buf = output;
        if (need_fix_leading_space) {
+               /* Process indent ourselves */
                int consecutive_spaces = 0;
-               /* between patch[1..last_tab_in_indent] strip the
-                * funny spaces, updating them to tab as needed.
+               int last = last_tab_in_indent + 1;
+
+               if (ws_rule & WS_INDENT_WITH_NON_TAB) {
+                       /* have "last" point at one past the indent */
+                       if (last_tab_in_indent < last_space_in_indent)
+                               last = last_space_in_indent + 1;
+                       else
+                               last = last_tab_in_indent + 1;
+               }
+
+               /*
+                * between patch[0..last-1], strip the funny spaces,
+                * updating them to tab as needed.
                 */
-               for (i = 1; i < last_tab_in_indent; i++, plen--) {
+               for (i = 0; i < last; i++) {
                        char ch = patch[i];
                        if (ch != ' ') {
                                consecutive_spaces = 0;
@@ -1599,37 +1602,355 @@ static int apply_line(char *output, const char *patch, int plen)
                                }
                        }
                }
+               while (0 < consecutive_spaces--)
+                       *output++ = ' ';
+               plen -= last;
+               patch += last;
                fixed = 1;
-               i = last_tab_in_indent;
        }
-       else
-               i = 1;
 
-       memcpy(output, patch + i, plen);
+       memcpy(output, patch, plen);
+       if (add_cr_to_tail)
+               output[plen++] = '\r';
        if (add_nl_to_tail)
                output[plen++] = '\n';
-       if (fixed)
+       if (fixed && count_error)
                applied_after_fixing_ws++;
        return output + plen - buf;
 }
 
-static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int inaccurate_eof)
+static void update_pre_post_images(struct image *preimage,
+                                  struct image *postimage,
+                                  char *buf,
+                                  size_t len)
+{
+       int i, ctx;
+       char *new, *old, *fixed;
+       struct image fixed_preimage;
+
+       /*
+        * Update the preimage with whitespace fixes.  Note that we
+        * are not losing preimage->buf -- apply_one_fragment() will
+        * free "oldlines".
+        */
+       prepare_image(&fixed_preimage, buf, len, 1);
+       assert(fixed_preimage.nr == preimage->nr);
+       for (i = 0; i < preimage->nr; i++)
+               fixed_preimage.line[i].flag = preimage->line[i].flag;
+       free(preimage->line_allocated);
+       *preimage = fixed_preimage;
+
+       /*
+        * Adjust the common context lines in postimage, in place.
+        * This is possible because whitespace fixing does not make
+        * the string grow.
+        */
+       new = old = postimage->buf;
+       fixed = preimage->buf;
+       for (i = ctx = 0; i < postimage->nr; i++) {
+               size_t len = postimage->line[i].len;
+               if (!(postimage->line[i].flag & LINE_COMMON)) {
+                       /* an added line -- no counterparts in preimage */
+                       memmove(new, old, len);
+                       old += len;
+                       new += len;
+                       continue;
+               }
+
+               /* a common context -- skip it in the original postimage */
+               old += len;
+
+               /* and find the corresponding one in the fixed preimage */
+               while (ctx < preimage->nr &&
+                      !(preimage->line[ctx].flag & LINE_COMMON)) {
+                       fixed += preimage->line[ctx].len;
+                       ctx++;
+               }
+               if (preimage->nr <= ctx)
+                       die("oops");
+
+               /* and copy it in, while fixing the line length */
+               len = preimage->line[ctx].len;
+               memcpy(new, fixed, len);
+               new += len;
+               fixed += len;
+               postimage->line[i].len = len;
+               ctx++;
+       }
+
+       /* Fix the length of the whole thing */
+       postimage->len = new - postimage->buf;
+}
+
+static int match_fragment(struct image *img,
+                         struct image *preimage,
+                         struct image *postimage,
+                         unsigned long try,
+                         int try_lno,
+                         unsigned ws_rule,
+                         int match_beginning, int match_end)
+{
+       int i;
+       char *fixed_buf, *buf, *orig, *target;
+
+       if (preimage->nr + try_lno > img->nr)
+               return 0;
+
+       if (match_beginning && try_lno)
+               return 0;
+
+       if (match_end && preimage->nr + try_lno != img->nr)
+               return 0;
+
+       /* Quick hash check */
+       for (i = 0; i < preimage->nr; i++)
+               if (preimage->line[i].hash != img->line[try_lno + i].hash)
+                       return 0;
+
+       /*
+        * Do we have an exact match?  If we were told to match
+        * at the end, size must be exactly at try+fragsize,
+        * otherwise try+fragsize must be still within the preimage,
+        * and either case, the old piece should match the preimage
+        * exactly.
+        */
+       if ((match_end
+            ? (try + preimage->len == img->len)
+            : (try + preimage->len <= img->len)) &&
+           !memcmp(img->buf + try, preimage->buf, preimage->len))
+               return 1;
+
+       if (ws_error_action != correct_ws_error)
+               return 0;
+
+       /*
+        * The hunk does not apply byte-by-byte, but the hash says
+        * it might with whitespace fuzz.
+        */
+       fixed_buf = xmalloc(preimage->len + 1);
+       buf = fixed_buf;
+       orig = preimage->buf;
+       target = img->buf + try;
+       for (i = 0; i < preimage->nr; i++) {
+               size_t fixlen; /* length after fixing the preimage */
+               size_t oldlen = preimage->line[i].len;
+               size_t tgtlen = img->line[try_lno + i].len;
+               size_t tgtfixlen; /* length after fixing the target line */
+               char tgtfixbuf[1024], *tgtfix;
+               int match;
+
+               /* Try fixing the line in the preimage */
+               fixlen = copy_wsfix(buf, orig, oldlen, ws_rule, 0);
+
+               /* Try fixing the line in the target */
+               if (sizeof(tgtfixbuf) < tgtlen)
+                       tgtfix = tgtfixbuf;
+               else
+                       tgtfix = xmalloc(tgtlen);
+               tgtfixlen = copy_wsfix(tgtfix, target, tgtlen, ws_rule, 0);
+
+               /*
+                * If they match, either the preimage was based on
+                * a version before our tree fixed whitespace breakage,
+                * or we are lacking a whitespace-fix patch the tree
+                * the preimage was based on already had (i.e. target
+                * has whitespace breakage, the preimage doesn't).
+                * In either case, we are fixing the whitespace breakages
+                * so we might as well take the fix together with their
+                * real change.
+                */
+               match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen));
+
+               if (tgtfix != tgtfixbuf)
+                       free(tgtfix);
+               if (!match)
+                       goto unmatch_exit;
+
+               orig += oldlen;
+               buf += fixlen;
+               target += tgtlen;
+       }
+
+       /*
+        * Yes, the preimage is based on an older version that still
+        * has whitespace breakages unfixed, and fixing them makes the
+        * hunk match.  Update the context lines in the postimage.
+        */
+       update_pre_post_images(preimage, postimage,
+                              fixed_buf, buf - fixed_buf);
+       return 1;
+
+ unmatch_exit:
+       free(fixed_buf);
+       return 0;
+}
+
+static int find_pos(struct image *img,
+                   struct image *preimage,
+                   struct image *postimage,
+                   int line,
+                   unsigned ws_rule,
+                   int match_beginning, int match_end)
+{
+       int i;
+       unsigned long backwards, forwards, try;
+       int backwards_lno, forwards_lno, try_lno;
+
+       if (preimage->nr > img->nr)
+               return -1;
+
+       /*
+        * If match_begining or match_end is specified, there is no
+        * point starting from a wrong line that will never match and
+        * wander around and wait for a match at the specified end.
+        */
+       if (match_beginning)
+               line = 0;
+       else if (match_end)
+               line = img->nr - preimage->nr;
+
+       if (line > img->nr)
+               line = img->nr;
+
+       try = 0;
+       for (i = 0; i < line; i++)
+               try += img->line[i].len;
+
+       /*
+        * There's probably some smart way to do this, but I'll leave
+        * that to the smart and beautiful people. I'm simple and stupid.
+        */
+       backwards = try;
+       backwards_lno = line;
+       forwards = try;
+       forwards_lno = line;
+       try_lno = line;
+
+       for (i = 0; ; i++) {
+               if (match_fragment(img, preimage, postimage,
+                                  try, try_lno, ws_rule,
+                                  match_beginning, match_end))
+                       return try_lno;
+
+       again:
+               if (backwards_lno == 0 && forwards_lno == img->nr)
+                       break;
+
+               if (i & 1) {
+                       if (backwards_lno == 0) {
+                               i++;
+                               goto again;
+                       }
+                       backwards_lno--;
+                       backwards -= img->line[backwards_lno].len;
+                       try = backwards;
+                       try_lno = backwards_lno;
+               } else {
+                       if (forwards_lno == img->nr) {
+                               i++;
+                               goto again;
+                       }
+                       forwards += img->line[forwards_lno].len;
+                       forwards_lno++;
+                       try = forwards;
+                       try_lno = forwards_lno;
+               }
+
+       }
+       return -1;
+}
+
+static void remove_first_line(struct image *img)
+{
+       img->buf += img->line[0].len;
+       img->len -= img->line[0].len;
+       img->line++;
+       img->nr--;
+}
+
+static void remove_last_line(struct image *img)
+{
+       img->len -= img->line[--img->nr].len;
+}
+
+static void update_image(struct image *img,
+                        int applied_pos,
+                        struct image *preimage,
+                        struct image *postimage)
+{
+       /*
+        * remove the copy of preimage at offset in img
+        * and replace it with postimage
+        */
+       int i, nr;
+       size_t remove_count, insert_count, applied_at = 0;
+       char *result;
+
+       for (i = 0; i < applied_pos; i++)
+               applied_at += img->line[i].len;
+
+       remove_count = 0;
+       for (i = 0; i < preimage->nr; i++)
+               remove_count += img->line[applied_pos + i].len;
+       insert_count = postimage->len;
+
+       /* Adjust the contents */
+       result = xmalloc(img->len + insert_count - remove_count + 1);
+       memcpy(result, img->buf, applied_at);
+       memcpy(result + applied_at, postimage->buf, postimage->len);
+       memcpy(result + applied_at + postimage->len,
+              img->buf + (applied_at + remove_count),
+              img->len - (applied_at + remove_count));
+       free(img->buf);
+       img->buf = result;
+       img->len += insert_count - remove_count;
+       result[img->len] = '\0';
+
+       /* Adjust the line table */
+       nr = img->nr + postimage->nr - preimage->nr;
+       if (preimage->nr < postimage->nr) {
+               /*
+                * NOTE: this knows that we never call remove_first_line()
+                * on anything other than pre/post image.
+                */
+               img->line = xrealloc(img->line, nr * sizeof(*img->line));
+               img->line_allocated = img->line;
+       }
+       if (preimage->nr != postimage->nr)
+               memmove(img->line + applied_pos + postimage->nr,
+                       img->line + applied_pos + preimage->nr,
+                       (img->nr - (applied_pos + preimage->nr)) *
+                       sizeof(*img->line));
+       memcpy(img->line + applied_pos,
+              postimage->line,
+              postimage->nr * sizeof(*img->line));
+       img->nr = nr;
+}
+
+static int apply_one_fragment(struct image *img, struct fragment *frag,
+                             int inaccurate_eof, unsigned ws_rule)
 {
        int match_beginning, match_end;
        const char *patch = frag->patch;
-       int offset, size = frag->size;
-       char *old = xmalloc(size);
-       char *new = xmalloc(size);
-       const char *oldlines, *newlines;
-       int oldsize = 0, newsize = 0;
+       int size = frag->size;
+       char *old, *new, *oldlines, *newlines;
        int new_blank_lines_at_end = 0;
        unsigned long leading, trailing;
-       int pos, lines;
+       int pos, applied_pos;
+       struct image preimage;
+       struct image postimage;
 
+       memset(&preimage, 0, sizeof(preimage));
+       memset(&postimage, 0, sizeof(postimage));
+       oldlines = xmalloc(size);
+       newlines = xmalloc(size);
+
+       old = oldlines;
+       new = newlines;
        while (size > 0) {
                char first;
                int len = linelen(patch, size);
-               int plen;
+               int plen, added;
                int added_blank_line = 0;
 
                if (!len)
@@ -1642,7 +1963,7 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int ina
                 * followed by "\ No newline", then we also remove the
                 * last one (which is the newline, of course).
                 */
-               plen = len-1;
+               plen = len - 1;
                if (len < size && patch[len] == '\\')
                        plen--;
                first = *patch;
@@ -1659,25 +1980,41 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int ina
                        if (plen < 0)
                                /* ... followed by '\No newline'; nothing */
                                break;
-                       old[oldsize++] = '\n';
-                       new[newsize++] = '\n';
+                       *old++ = '\n';
+                       *new++ = '\n';
+                       add_line_info(&preimage, "\n", 1, LINE_COMMON);
+                       add_line_info(&postimage, "\n", 1, LINE_COMMON);
                        break;
                case ' ':
                case '-':
-                       memcpy(old + oldsize, patch + 1, plen);
-                       oldsize += plen;
+                       memcpy(old, patch + 1, plen);
+                       add_line_info(&preimage, old, plen,
+                                     (first == ' ' ? LINE_COMMON : 0));
+                       old += plen;
                        if (first == '-')
                                break;
                /* Fall-through for ' ' */
                case '+':
-                       if (first != '+' || !no_add) {
-                               int added = apply_line(new + newsize, patch,
-                                                      plen);
-                               newsize += added;
-                               if (first == '+' &&
-                                   added == 1 && new[newsize-1] == '\n')
-                                       added_blank_line = 1;
+                       /* --no-add does not add new lines */
+                       if (first == '+' && no_add)
+                               break;
+
+                       if (first != '+' ||
+                           !whitespace_error ||
+                           ws_error_action != correct_ws_error) {
+                               memcpy(new, patch + 1, plen);
+                               added = plen;
+                       }
+                       else {
+                               added = copy_wsfix(new, patch + 1, plen,
+                                                  ws_rule, 1);
                        }
+                       add_line_info(&postimage, new, added,
+                                     (first == '+' ? 0 : LINE_COMMON));
+                       new += added;
+                       if (first == '+' &&
+                           added == 1 && new[-1] == '\n')
+                               added_blank_line = 1;
                        break;
                case '@': case '\\':
                        /* Ignore it, we already handled it */
@@ -1694,15 +2031,13 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int ina
                patch += len;
                size -= len;
        }
-
-       if (inaccurate_eof && oldsize > 0 && old[oldsize - 1] == '\n' &&
-                       newsize > 0 && new[newsize - 1] == '\n') {
-               oldsize--;
-               newsize--;
+       if (inaccurate_eof &&
+           old > oldlines && old[-1] == '\n' &&
+           new > newlines && new[-1] == '\n') {
+               old--;
+               new--;
        }
 
-       oldlines = old;
-       newlines = new;
        leading = frag->leading;
        trailing = frag->trailing;
 
@@ -1723,33 +2058,21 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int ina
                match_end = !trailing;
        }
 
-       lines = 0;
-       pos = frag->newpos;
+       pos = frag->newpos ? (frag->newpos - 1) : 0;
+       preimage.buf = oldlines;
+       preimage.len = old - oldlines;
+       postimage.buf = newlines;
+       postimage.len = new - newlines;
+       preimage.line = preimage.line_allocated;
+       postimage.line = postimage.line_allocated;
+
        for (;;) {
-               offset = find_offset(buf->buf, buf->len,
-                                    oldlines, oldsize, pos, &lines);
-               if (match_end && offset + oldsize != buf->len)
-                       offset = -1;
-               if (match_beginning && offset)
-                       offset = -1;
-               if (offset >= 0) {
-                       if (new_whitespace == strip_whitespace &&
-                           (buf->len - oldsize - offset == 0)) /* end of file? */
-                               newsize -= new_blank_lines_at_end;
-
-                       /* Warn if it was necessary to reduce the number
-                        * of context lines.
-                        */
-                       if ((leading != frag->leading) ||
-                           (trailing != frag->trailing))
-                               fprintf(stderr, "Context reduced to (%ld/%ld)"
-                                       " to apply fragment at %d\n",
-                                       leading, trailing, pos + lines);
-
-                       strbuf_splice(buf, offset, oldsize, newlines, newsize);
-                       offset = 0;
+
+               applied_pos = find_pos(img, &preimage, &postimage, pos,
+                                      ws_rule, match_beginning, match_end);
+
+               if (applied_pos >= 0)
                        break;
-               }
 
                /* Am I at my context limits? */
                if ((leading <= p_context) && (trailing <= p_context))
@@ -1758,32 +2081,64 @@ static int apply_one_fragment(struct strbuf *buf, struct fragment *frag, int ina
                        match_beginning = match_end = 0;
                        continue;
                }
-               /* Reduce the number of context lines
-                * Reduce both leading and trailing if they are equal
-                * otherwise just reduce the larger context.
+
+               /*
+                * Reduce the number of context lines; reduce both
+                * leading and trailing if they are equal otherwise
+                * just reduce the larger context.
                 */
                if (leading >= trailing) {
-                       remove_first_line(&oldlines, &oldsize);
-                       remove_first_line(&newlines, &newsize);
+                       remove_first_line(&preimage);
+                       remove_first_line(&postimage);
                        pos--;
                        leading--;
                }
                if (trailing > leading) {
-                       remove_last_line(&oldlines, &oldsize);
-                       remove_last_line(&newlines, &newsize);
+                       remove_last_line(&preimage);
+                       remove_last_line(&postimage);
                        trailing--;
                }
        }
 
-       if (offset && apply_verbosely)
-               error("while searching for:\n%.*s", oldsize, oldlines);
+       if (applied_pos >= 0) {
+               if (ws_error_action == correct_ws_error &&
+                   new_blank_lines_at_end &&
+                   postimage.nr + applied_pos == img->nr) {
+                       /*
+                        * If the patch application adds blank lines
+                        * at the end, and if the patch applies at the
+                        * end of the image, remove those added blank
+                        * lines.
+                        */
+                       while (new_blank_lines_at_end--)
+                               remove_last_line(&postimage);
+               }
+
+               /*
+                * Warn if it was necessary to reduce the number
+                * of context lines.
+                */
+               if ((leading != frag->leading) ||
+                   (trailing != frag->trailing))
+                       fprintf(stderr, "Context reduced to (%ld/%ld)"
+                               " to apply fragment at %d\n",
+                               leading, trailing, applied_pos+1);
+               update_image(img, applied_pos, &preimage, &postimage);
+       } else {
+               if (apply_verbosely)
+                       error("while searching for:\n%.*s",
+                             (int)(old - oldlines), oldlines);
+       }
+
+       free(oldlines);
+       free(newlines);
+       free(preimage.line_allocated);
+       free(postimage.line_allocated);
 
-       free(old);
-       free(new);
-       return offset;
+       return (applied_pos < 0);
 }
 
-static int apply_binary_fragment(struct strbuf *buf, struct patch *patch)
+static int apply_binary_fragment(struct image *img, struct patch *patch)
 {
        struct fragment *fragment = patch->fragments;
        unsigned long len;
@@ -1800,27 +2155,32 @@ static int apply_binary_fragment(struct strbuf *buf, struct patch *patch)
        }
        switch (fragment->binary_patch_method) {
        case BINARY_DELTA_DEFLATED:
-               dst = patch_delta(buf->buf, buf->len, fragment->patch,
+               dst = patch_delta(img->buf, img->len, fragment->patch,
                                  fragment->size, &len);
                if (!dst)
                        return -1;
-               /* XXX patch_delta NUL-terminates */
-               strbuf_attach(buf, dst, len, len + 1);
+               clear_image(img);
+               img->buf = dst;
+               img->len = len;
                return 0;
        case BINARY_LITERAL_DEFLATED:
-               strbuf_reset(buf);
-               strbuf_add(buf, fragment->patch, fragment->size);
+               clear_image(img);
+               img->len = fragment->size;
+               img->buf = xmalloc(img->len+1);
+               memcpy(img->buf, fragment->patch, img->len);
+               img->buf[img->len] = '\0';
                return 0;
        }
        return -1;
 }
 
-static int apply_binary(struct strbuf *buf, struct patch *patch)
+static int apply_binary(struct image *img, struct patch *patch)
 {
        const char *name = patch->old_name ? patch->old_name : patch->new_name;
        unsigned char sha1[20];
 
-       /* For safety, we require patch index line to contain
+       /*
+        * For safety, we require patch index line to contain
         * full 40-byte textual SHA1 for old and new, at least for now.
         */
        if (strlen(patch->old_sha1_prefix) != 40 ||
@@ -1831,10 +2191,11 @@ static int apply_binary(struct strbuf *buf, struct patch *patch)
                             "without full index line", name);
 
        if (patch->old_name) {
-               /* See if the old one matches what the patch
+               /*
+                * See if the old one matches what the patch
                 * applies to.
                 */
-               hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
+               hash_sha1_file(img->buf, img->len, blob_type, sha1);
                if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix))
                        return error("the patch applies to '%s' (%s), "
                                     "which does not match the "
@@ -1843,14 +2204,14 @@ static int apply_binary(struct strbuf *buf, struct patch *patch)
        }
        else {
                /* Otherwise, the old one must be empty. */
-               if (buf->len)
+               if (img->len)
                        return error("the patch applies to an empty "
                                     "'%s' but it is not empty", name);
        }
 
        get_sha1_hex(patch->new_sha1_prefix, sha1);
        if (is_null_sha1(sha1)) {
-               strbuf_release(buf);
+               clear_image(img);
                return 0; /* deletion patch */
        }
 
@@ -1865,19 +2226,21 @@ static int apply_binary(struct strbuf *buf, struct patch *patch)
                        return error("the necessary postimage %s for "
                                     "'%s' cannot be read",
                                     patch->new_sha1_prefix, name);
-               /* XXX read_sha1_file NUL-terminates */
-               strbuf_attach(buf, result, size, size + 1);
+               clear_image(img);
+               img->buf = result;
+               img->len = size;
        } else {
-               /* We have verified buf matches the preimage;
+               /*
+                * We have verified buf matches the preimage;
                 * apply the patch data to it, which is stored
                 * in the patch->fragments->{patch,size}.
                 */
-               if (apply_binary_fragment(buf, patch))
+               if (apply_binary_fragment(img, patch))
                        return error("binary patch does not apply to '%s'",
                                     name);
 
                /* verify that the result matches */
-               hash_sha1_file(buf->buf, buf->len, blob_type, sha1);
+               hash_sha1_file(img->buf, img->len, blob_type, sha1);
                if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix))
                        return error("binary patch to '%s' creates incorrect result (expecting %s, got %s)",
                                name, patch->new_sha1_prefix, sha1_to_hex(sha1));
@@ -1886,16 +2249,18 @@ static int apply_binary(struct strbuf *buf, struct patch *patch)
        return 0;
 }
 
-static int apply_fragments(struct strbuf *buf, struct patch *patch)
+static int apply_fragments(struct image *img, struct patch *patch)
 {
        struct fragment *frag = patch->fragments;
        const char *name = patch->old_name ? patch->old_name : patch->new_name;
+       unsigned ws_rule = patch->ws_rule;
+       unsigned inaccurate_eof = patch->inaccurate_eof;
 
        if (patch->is_binary)
-               return apply_binary(buf, patch);
+               return apply_binary(img, patch);
 
        while (frag) {
-               if (apply_one_fragment(buf, frag, patch->inaccurate_eof)) {
+               if (apply_one_fragment(img, frag, inaccurate_eof, ws_rule)) {
                        error("patch failed: %s:%ld", name, frag->oldpos);
                        if (!apply_with_reject)
                                return -1;
@@ -1931,6 +2296,9 @@ static int read_file_or_gitlink(struct cache_entry *ce, struct strbuf *buf)
 static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *ce)
 {
        struct strbuf buf;
+       struct image image;
+       size_t len;
+       char *img;
 
        strbuf_init(&buf, 0);
        if (cached) {
@@ -1953,9 +2321,14 @@ static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *
                }
        }
 
-       if (apply_fragments(&buf, patch) < 0)
+       img = strbuf_detach(&buf, &len);
+       prepare_image(&image, img, len, !patch->is_binary);
+
+       if (apply_fragments(&image, patch) < 0)
                return -1; /* note with --reject this succeeds. */
-       patch->result = strbuf_detach(&buf, &patch->resultsize);
+       patch->result = image.buf;
+       patch->resultsize = image.len;
+       free(image.line_allocated);
 
        if (0 < patch->is_delete && patch->resultsize)
                return error("removal patch leaves file contents");
@@ -2067,7 +2440,8 @@ static int check_patch(struct patch *patch, struct patch *prev_patch)
 
        if (new_name && prev_patch && 0 < prev_patch->is_delete &&
            !strcmp(prev_patch->old_name, new_name))
-               /* A type-change diff is always split into a patch to
+               /*
+                * A type-change diff is always split into a patch to
                 * delete old, immediately followed by a patch to
                 * create new (see diff.c::run_diff()); in such a case
                 * it is Ok that the entry to be deleted by the
@@ -2671,7 +3045,7 @@ static int apply_patch(int fd, const char *filename, int inaccurate_eof)
                offset += nr;
        }
 
-       if (whitespace_error && (new_whitespace == error_on_whitespace))
+       if (whitespace_error && (ws_error_action == die_on_ws_error))
                apply = 0;
 
        update_index = check_index && apply;
@@ -2866,12 +3240,12 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix)
                                squelched,
                                squelched == 1 ? "" : "s");
                }
-               if (new_whitespace == error_on_whitespace)
+               if (ws_error_action == die_on_ws_error)
                        die("%d line%s add%s whitespace errors.",
                            whitespace_error,
                            whitespace_error == 1 ? "" : "s",
                            whitespace_error == 1 ? "s" : "");
-               if (applied_after_fixing_ws)
+               if (applied_after_fixing_ws && apply)
                        fprintf(stderr, "warning: %d line%s applied after"
                                " fixing whitespace errors.\n",
                                applied_after_fixing_ws,
@@ -2885,7 +3259,7 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix)
 
        if (update_index) {
                if (write_cache(newfd, active_cache, active_nr) ||
-                   close(newfd) || commit_locked_index(&lock_file))
+                   commit_locked_index(&lock_file))
                        die("Unable to write new index file");
        }