apply: do not barf on patch with too large an offset
[gitweb.git] / builtin-apply.c
index 7fb330541fe62a52cef375ef78b8764b4361fe06..5ed4e918c0873b641a2f88b08bff9c66d6c4d850 100644 (file)
@@ -1515,14 +1515,183 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
        }
 }
 
+static int copy_wsfix(char *output, const char *patch, int plen,
+                     unsigned ws_rule, int count_error)
+{
+       /*
+        * plen is number of bytes to be copied from patch, starting
+        * at patch.  Typically patch[plen-1] is '\n', unless this is
+        * the incomplete last line.
+        */
+       int i;
+       int add_nl_to_tail = 0;
+       int add_cr_to_tail = 0;
+       int fixed = 0;
+       int last_tab_in_indent = -1;
+       int last_space_in_indent = -1;
+       int need_fix_leading_space = 0;
+       char *buf;
+
+       /*
+        * Strip trailing whitespace
+        */
+       if ((ws_rule & WS_TRAILING_SPACE) &&
+           (2 < plen && isspace(patch[plen-2]))) {
+               if (patch[plen - 1] == '\n') {
+                       add_nl_to_tail = 1;
+                       plen--;
+                       if (1 < plen && patch[plen - 1] == '\r') {
+                               add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);
+                               plen--;
+                       }
+               }
+               if (0 < plen && isspace(patch[plen - 1])) {
+                       while (0 < plen && isspace(patch[plen-1]))
+                               plen--;
+                       fixed = 1;
+               }
+       }
+
+       /*
+        * Check leading whitespaces (indent)
+        */
+       for (i = 0; i < plen; i++) {
+               char ch = patch[i];
+               if (ch == '\t') {
+                       last_tab_in_indent = i;
+                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
+                           0 <= last_space_in_indent)
+                           need_fix_leading_space = 1;
+               } else if (ch == ' ') {
+                       last_space_in_indent = i;
+                       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
+                           8 <= i - last_tab_in_indent)
+                               need_fix_leading_space = 1;
+               } else
+                       break;
+       }
+
+       buf = output;
+       if (need_fix_leading_space) {
+               /* Process indent ourselves */
+               int consecutive_spaces = 0;
+               int last = last_tab_in_indent + 1;
+
+               if (ws_rule & WS_INDENT_WITH_NON_TAB) {
+                       /* have "last" point at one past the indent */
+                       if (last_tab_in_indent < last_space_in_indent)
+                               last = last_space_in_indent + 1;
+                       else
+                               last = last_tab_in_indent + 1;
+               }
+
+               /*
+                * between patch[0..last-1], strip the funny spaces,
+                * updating them to tab as needed.
+                */
+               for (i = 0; i < last; i++) {
+                       char ch = patch[i];
+                       if (ch != ' ') {
+                               consecutive_spaces = 0;
+                               *output++ = ch;
+                       } else {
+                               consecutive_spaces++;
+                               if (consecutive_spaces == 8) {
+                                       *output++ = '\t';
+                                       consecutive_spaces = 0;
+                               }
+                       }
+               }
+               while (0 < consecutive_spaces--)
+                       *output++ = ' ';
+               plen -= last;
+               patch += last;
+               fixed = 1;
+       }
+
+       memcpy(output, patch, plen);
+       if (add_cr_to_tail)
+               output[plen++] = '\r';
+       if (add_nl_to_tail)
+               output[plen++] = '\n';
+       if (fixed && count_error)
+               applied_after_fixing_ws++;
+       return output + plen - buf;
+}
+
+static void update_pre_post_images(struct image *preimage,
+                                  struct image *postimage,
+                                  char *buf,
+                                  size_t len)
+{
+       int i, ctx;
+       char *new, *old, *fixed;
+       struct image fixed_preimage;
+
+       /*
+        * Update the preimage with whitespace fixes.  Note that we
+        * are not losing preimage->buf -- apply_one_fragment() will
+        * free "oldlines".
+        */
+       prepare_image(&fixed_preimage, buf, len, 1);
+       assert(fixed_preimage.nr == preimage->nr);
+       for (i = 0; i < preimage->nr; i++)
+               fixed_preimage.line[i].flag = preimage->line[i].flag;
+       free(preimage->line_allocated);
+       *preimage = fixed_preimage;
+
+       /*
+        * Adjust the common context lines in postimage, in place.
+        * This is possible because whitespace fixing does not make
+        * the string grow.
+        */
+       new = old = postimage->buf;
+       fixed = preimage->buf;
+       for (i = ctx = 0; i < postimage->nr; i++) {
+               size_t len = postimage->line[i].len;
+               if (!(postimage->line[i].flag & LINE_COMMON)) {
+                       /* an added line -- no counterparts in preimage */
+                       memmove(new, old, len);
+                       old += len;
+                       new += len;
+                       continue;
+               }
+
+               /* a common context -- skip it in the original postimage */
+               old += len;
+
+               /* and find the corresponding one in the fixed preimage */
+               while (ctx < preimage->nr &&
+                      !(preimage->line[ctx].flag & LINE_COMMON)) {
+                       fixed += preimage->line[ctx].len;
+                       ctx++;
+               }
+               if (preimage->nr <= ctx)
+                       die("oops");
+
+               /* and copy it in, while fixing the line length */
+               len = preimage->line[ctx].len;
+               memcpy(new, fixed, len);
+               new += len;
+               fixed += len;
+               postimage->line[i].len = len;
+               ctx++;
+       }
+
+       /* Fix the length of the whole thing */
+       postimage->len = new - postimage->buf;
+}
+
 static int match_fragment(struct image *img,
                          struct image *preimage,
                          struct image *postimage,
                          unsigned long try,
                          int try_lno,
+                         unsigned ws_rule,
                          int match_beginning, int match_end)
 {
        int i;
+       char *fixed_buf, *buf, *orig, *target;
 
        if (preimage->nr + try_lno > img->nr)
                return 0;
@@ -1551,10 +1720,68 @@ static int match_fragment(struct image *img,
            !memcmp(img->buf + try, preimage->buf, preimage->len))
                return 1;
 
+       if (ws_error_action != correct_ws_error)
+               return 0;
+
        /*
-        * NEEDSWORK: We can optionally match fuzzily here, but
-        * that is for a later round.
+        * The hunk does not apply byte-by-byte, but the hash says
+        * it might with whitespace fuzz.
         */
+       fixed_buf = xmalloc(preimage->len + 1);
+       buf = fixed_buf;
+       orig = preimage->buf;
+       target = img->buf + try;
+       for (i = 0; i < preimage->nr; i++) {
+               size_t fixlen; /* length after fixing the preimage */
+               size_t oldlen = preimage->line[i].len;
+               size_t tgtlen = img->line[try_lno + i].len;
+               size_t tgtfixlen; /* length after fixing the target line */
+               char tgtfixbuf[1024], *tgtfix;
+               int match;
+
+               /* Try fixing the line in the preimage */
+               fixlen = copy_wsfix(buf, orig, oldlen, ws_rule, 0);
+
+               /* Try fixing the line in the target */
+               if (sizeof(tgtfixbuf) < tgtlen)
+                       tgtfix = tgtfixbuf;
+               else
+                       tgtfix = xmalloc(tgtlen);
+               tgtfixlen = copy_wsfix(tgtfix, target, tgtlen, ws_rule, 0);
+
+               /*
+                * If they match, either the preimage was based on
+                * a version before our tree fixed whitespace breakage,
+                * or we are lacking a whitespace-fix patch the tree
+                * the preimage was based on already had (i.e. target
+                * has whitespace breakage, the preimage doesn't).
+                * In either case, we are fixing the whitespace breakages
+                * so we might as well take the fix together with their
+                * real change.
+                */
+               match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen));
+
+               if (tgtfix != tgtfixbuf)
+                       free(tgtfix);
+               if (!match)
+                       goto unmatch_exit;
+
+               orig += oldlen;
+               buf += fixlen;
+               target += tgtlen;
+       }
+
+       /*
+        * Yes, the preimage is based on an older version that still
+        * has whitespace breakages unfixed, and fixing them makes the
+        * hunk match.  Update the context lines in the postimage.
+        */
+       update_pre_post_images(preimage, postimage,
+                              fixed_buf, buf - fixed_buf);
+       return 1;
+
+ unmatch_exit:
+       free(fixed_buf);
        return 0;
 }
 
@@ -1562,6 +1789,7 @@ static int find_pos(struct image *img,
                    struct image *preimage,
                    struct image *postimage,
                    int line,
+                   unsigned ws_rule,
                    int match_beginning, int match_end)
 {
        int i;
@@ -1581,6 +1809,9 @@ static int find_pos(struct image *img,
        else if (match_end)
                line = img->nr - preimage->nr;
 
+       if (line > img->nr)
+               line = img->nr;
+
        try = 0;
        for (i = 0; i < line; i++)
                try += img->line[i].len;
@@ -1597,7 +1828,7 @@ static int find_pos(struct image *img,
 
        for (i = 0; ; i++) {
                if (match_fragment(img, preimage, postimage,
-                                  try, try_lno,
+                                  try, try_lno, ws_rule,
                                   match_beginning, match_end))
                        return try_lno;
 
@@ -1642,108 +1873,6 @@ static void remove_last_line(struct image *img)
        img->len -= img->line[--img->nr].len;
 }
 
-static int apply_line(char *output, const char *patch, int plen,
-                     unsigned ws_rule)
-{
-       /*
-        * plen is number of bytes to be copied from patch,
-        * starting at patch+1 (patch[0] is '+').  Typically
-        * patch[plen] is '\n', unless this is the incomplete
-        * last line.
-        */
-       int i;
-       int add_nl_to_tail = 0;
-       int fixed = 0;
-       int last_tab_in_indent = 0;
-       int last_space_in_indent = 0;
-       int need_fix_leading_space = 0;
-       char *buf;
-
-       if ((ws_error_action != correct_ws_error) || !whitespace_error ||
-           *patch != '+') {
-               memcpy(output, patch + 1, plen);
-               return plen;
-       }
-
-       /*
-        * Strip trailing whitespace
-        */
-       if ((ws_rule & WS_TRAILING_SPACE) &&
-           (1 < plen && isspace(patch[plen-1]))) {
-               if (patch[plen] == '\n')
-                       add_nl_to_tail = 1;
-               plen--;
-               while (0 < plen && isspace(patch[plen]))
-                       plen--;
-               fixed = 1;
-       }
-
-       /*
-        * Check leading whitespaces (indent)
-        */
-       for (i = 1; i < plen; i++) {
-               char ch = patch[i];
-               if (ch == '\t') {
-                       last_tab_in_indent = i;
-                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
-                           0 < last_space_in_indent)
-                           need_fix_leading_space = 1;
-               } else if (ch == ' ') {
-                       last_space_in_indent = i;
-                       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
-                           8 <= i - last_tab_in_indent)
-                               need_fix_leading_space = 1;
-               }
-               else
-                       break;
-       }
-
-       buf = output;
-       if (need_fix_leading_space) {
-               int consecutive_spaces = 0;
-               int last = last_tab_in_indent + 1;
-
-               if (ws_rule & WS_INDENT_WITH_NON_TAB) {
-                       /* have "last" point at one past the indent */
-                       if (last_tab_in_indent < last_space_in_indent)
-                               last = last_space_in_indent + 1;
-                       else
-                               last = last_tab_in_indent + 1;
-               }
-
-               /*
-                * between patch[1..last], strip the funny spaces,
-                * updating them to tab as needed.
-                */
-               for (i = 1; i < last; i++, plen--) {
-                       char ch = patch[i];
-                       if (ch != ' ') {
-                               consecutive_spaces = 0;
-                               *output++ = ch;
-                       } else {
-                               consecutive_spaces++;
-                               if (consecutive_spaces == 8) {
-                                       *output++ = '\t';
-                                       consecutive_spaces = 0;
-                               }
-                       }
-               }
-               while (0 < consecutive_spaces--)
-                       *output++ = ' ';
-               fixed = 1;
-               i = last;
-       }
-       else
-               i = 1;
-
-       memcpy(output, patch + i, plen);
-       if (add_nl_to_tail)
-               output[plen++] = '\n';
-       if (fixed)
-               applied_after_fixing_ws++;
-       return output + plen - buf;
-}
-
 static void update_image(struct image *img,
                         int applied_pos,
                         struct image *preimage,
@@ -1821,7 +1950,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
        while (size > 0) {
                char first;
                int len = linelen(patch, size);
-               int plen;
+               int plen, added;
                int added_blank_line = 0;
 
                if (!len)
@@ -1866,17 +1995,26 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
                                break;
                /* Fall-through for ' ' */
                case '+':
-                       if (first != '+' || !no_add) {
-                               int added = apply_line(new, patch,
-                                                      plen, ws_rule);
-                               add_line_info(&postimage, new, added,
-                                             (first == '+' ? 0 : LINE_COMMON));
-
-                               new += added;
-                               if (first == '+' &&
-                                   added == 1 && new[-1] == '\n')
-                                       added_blank_line = 1;
+                       /* --no-add does not add new lines */
+                       if (first == '+' && no_add)
+                               break;
+
+                       if (first != '+' ||
+                           !whitespace_error ||
+                           ws_error_action != correct_ws_error) {
+                               memcpy(new, patch + 1, plen);
+                               added = plen;
+                       }
+                       else {
+                               added = copy_wsfix(new, patch + 1, plen,
+                                                  ws_rule, 1);
                        }
+                       add_line_info(&postimage, new, added,
+                                     (first == '+' ? 0 : LINE_COMMON));
+                       new += added;
+                       if (first == '+' &&
+                           added == 1 && new[-1] == '\n')
+                               added_blank_line = 1;
                        break;
                case '@': case '\\':
                        /* Ignore it, we already handled it */
@@ -1930,8 +2068,8 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
 
        for (;;) {
 
-               applied_pos = find_pos(img, &preimage, &postimage,
-                                      pos, match_beginning, match_end);
+               applied_pos = find_pos(img, &preimage, &postimage, pos,
+                                      ws_rule, match_beginning, match_end);
 
                if (applied_pos >= 0)
                        break;