ws_fix_copy(): move the whitespace fixing function to ws.c
[gitweb.git] / builtin-apply.c
index e046e87ad28f47becf5b66abc68476ab2f49e0f2..64471a27e7651046ec3843087cc73803cca2502a 100644 (file)
@@ -170,6 +170,7 @@ struct line {
        size_t len;
        unsigned hash : 24;
        unsigned flag : 8;
+#define LINE_COMMON     1
 };
 
 /*
@@ -179,6 +180,7 @@ struct image {
        char *buf;
        size_t len;
        size_t nr;
+       size_t alloc;
        struct line *line_allocated;
        struct line *line;
 };
@@ -195,49 +197,39 @@ static uint32_t hash_line(const char *cp, size_t len)
        return h;
 }
 
+static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
+{
+       ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
+       img->line_allocated[img->nr].len = len;
+       img->line_allocated[img->nr].hash = hash_line(bol, len);
+       img->line_allocated[img->nr].flag = flag;
+       img->nr++;
+}
+
 static void prepare_image(struct image *image, char *buf, size_t len,
                          int prepare_linetable)
 {
        const char *cp, *ep;
-       int n;
 
+       memset(image, 0, sizeof(*image));
        image->buf = buf;
        image->len = len;
 
-       if (!prepare_linetable) {
-               image->line = NULL;
-               image->line_allocated = NULL;
-               image->nr = 0;
+       if (!prepare_linetable)
                return;
-       }
 
        ep = image->buf + image->len;
-
-       /* First count lines */
-       cp = image->buf;
-       n = 0;
-       while (cp < ep) {
-               cp = strchrnul(cp, '\n');
-               n++;
-               cp++;
-       }
-
-       image->line_allocated = xcalloc(n, sizeof(struct line));
-       image->line = image->line_allocated;
-       image->nr = n;
        cp = image->buf;
-       n = 0;
        while (cp < ep) {
                const char *next;
                for (next = cp; next < ep && *next != '\n'; next++)
                        ;
                if (next < ep)
                        next++;
-               image->line[n].len = next - cp;
-               image->line[n].hash = hash_line(cp, next - cp);
+               add_line_info(image, cp, next - cp, 0);
                cp = next;
-               n++;
        }
+       image->line = image->line_allocated;
 }
 
 static void clear_image(struct image *image)
@@ -1523,14 +1515,79 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
        }
 }
 
+static void update_pre_post_images(struct image *preimage,
+                                  struct image *postimage,
+                                  char *buf,
+                                  size_t len)
+{
+       int i, ctx;
+       char *new, *old, *fixed;
+       struct image fixed_preimage;
+
+       /*
+        * Update the preimage with whitespace fixes.  Note that we
+        * are not losing preimage->buf -- apply_one_fragment() will
+        * free "oldlines".
+        */
+       prepare_image(&fixed_preimage, buf, len, 1);
+       assert(fixed_preimage.nr == preimage->nr);
+       for (i = 0; i < preimage->nr; i++)
+               fixed_preimage.line[i].flag = preimage->line[i].flag;
+       free(preimage->line_allocated);
+       *preimage = fixed_preimage;
+
+       /*
+        * Adjust the common context lines in postimage, in place.
+        * This is possible because whitespace fixing does not make
+        * the string grow.
+        */
+       new = old = postimage->buf;
+       fixed = preimage->buf;
+       for (i = ctx = 0; i < postimage->nr; i++) {
+               size_t len = postimage->line[i].len;
+               if (!(postimage->line[i].flag & LINE_COMMON)) {
+                       /* an added line -- no counterparts in preimage */
+                       memmove(new, old, len);
+                       old += len;
+                       new += len;
+                       continue;
+               }
+
+               /* a common context -- skip it in the original postimage */
+               old += len;
+
+               /* and find the corresponding one in the fixed preimage */
+               while (ctx < preimage->nr &&
+                      !(preimage->line[ctx].flag & LINE_COMMON)) {
+                       fixed += preimage->line[ctx].len;
+                       ctx++;
+               }
+               if (preimage->nr <= ctx)
+                       die("oops");
+
+               /* and copy it in, while fixing the line length */
+               len = preimage->line[ctx].len;
+               memcpy(new, fixed, len);
+               new += len;
+               fixed += len;
+               postimage->line[i].len = len;
+               ctx++;
+       }
+
+       /* Fix the length of the whole thing */
+       postimage->len = new - postimage->buf;
+}
+
 static int match_fragment(struct image *img,
                          struct image *preimage,
                          struct image *postimage,
                          unsigned long try,
                          int try_lno,
+                         unsigned ws_rule,
                          int match_beginning, int match_end)
 {
        int i;
+       char *fixed_buf, *buf, *orig, *target;
 
        if (preimage->nr + try_lno > img->nr)
                return 0;
@@ -1559,10 +1616,68 @@ static int match_fragment(struct image *img,
            !memcmp(img->buf + try, preimage->buf, preimage->len))
                return 1;
 
+       if (ws_error_action != correct_ws_error)
+               return 0;
+
+       /*
+        * The hunk does not apply byte-by-byte, but the hash says
+        * it might with whitespace fuzz.
+        */
+       fixed_buf = xmalloc(preimage->len + 1);
+       buf = fixed_buf;
+       orig = preimage->buf;
+       target = img->buf + try;
+       for (i = 0; i < preimage->nr; i++) {
+               size_t fixlen; /* length after fixing the preimage */
+               size_t oldlen = preimage->line[i].len;
+               size_t tgtlen = img->line[try_lno + i].len;
+               size_t tgtfixlen; /* length after fixing the target line */
+               char tgtfixbuf[1024], *tgtfix;
+               int match;
+
+               /* Try fixing the line in the preimage */
+               fixlen = ws_fix_copy(buf, orig, oldlen, ws_rule, NULL);
+
+               /* Try fixing the line in the target */
+               if (sizeof(tgtfixbuf) < tgtlen)
+                       tgtfix = tgtfixbuf;
+               else
+                       tgtfix = xmalloc(tgtlen);
+               tgtfixlen = ws_fix_copy(tgtfix, target, tgtlen, ws_rule, NULL);
+
+               /*
+                * If they match, either the preimage was based on
+                * a version before our tree fixed whitespace breakage,
+                * or we are lacking a whitespace-fix patch the tree
+                * the preimage was based on already had (i.e. target
+                * has whitespace breakage, the preimage doesn't).
+                * In either case, we are fixing the whitespace breakages
+                * so we might as well take the fix together with their
+                * real change.
+                */
+               match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen));
+
+               if (tgtfix != tgtfixbuf)
+                       free(tgtfix);
+               if (!match)
+                       goto unmatch_exit;
+
+               orig += oldlen;
+               buf += fixlen;
+               target += tgtlen;
+       }
+
        /*
-        * NEEDSWORK: We can optionally match fuzzily here, but
-        * that is for a later round.
+        * Yes, the preimage is based on an older version that still
+        * has whitespace breakages unfixed, and fixing them makes the
+        * hunk match.  Update the context lines in the postimage.
         */
+       update_pre_post_images(preimage, postimage,
+                              fixed_buf, buf - fixed_buf);
+       return 1;
+
+ unmatch_exit:
+       free(fixed_buf);
        return 0;
 }
 
@@ -1570,6 +1685,7 @@ static int find_pos(struct image *img,
                    struct image *preimage,
                    struct image *postimage,
                    int line,
+                   unsigned ws_rule,
                    int match_beginning, int match_end)
 {
        int i;
@@ -1579,6 +1695,19 @@ static int find_pos(struct image *img,
        if (preimage->nr > img->nr)
                return -1;
 
+       /*
+        * If match_begining or match_end is specified, there is no
+        * point starting from a wrong line that will never match and
+        * wander around and wait for a match at the specified end.
+        */
+       if (match_beginning)
+               line = 0;
+       else if (match_end)
+               line = img->nr - preimage->nr;
+
+       if (line > img->nr)
+               line = img->nr;
+
        try = 0;
        for (i = 0; i < line; i++)
                try += img->line[i].len;
@@ -1595,7 +1724,7 @@ static int find_pos(struct image *img,
 
        for (i = 0; ; i++) {
                if (match_fragment(img, preimage, postimage,
-                                  try, try_lno,
+                                  try, try_lno, ws_rule,
                                   match_beginning, match_end))
                        return try_lno;
 
@@ -1640,108 +1769,6 @@ static void remove_last_line(struct image *img)
        img->len -= img->line[--img->nr].len;
 }
 
-static int apply_line(char *output, const char *patch, int plen,
-                     unsigned ws_rule)
-{
-       /*
-        * plen is number of bytes to be copied from patch,
-        * starting at patch+1 (patch[0] is '+').  Typically
-        * patch[plen] is '\n', unless this is the incomplete
-        * last line.
-        */
-       int i;
-       int add_nl_to_tail = 0;
-       int fixed = 0;
-       int last_tab_in_indent = 0;
-       int last_space_in_indent = 0;
-       int need_fix_leading_space = 0;
-       char *buf;
-
-       if ((ws_error_action != correct_ws_error) || !whitespace_error ||
-           *patch != '+') {
-               memcpy(output, patch + 1, plen);
-               return plen;
-       }
-
-       /*
-        * Strip trailing whitespace
-        */
-       if ((ws_rule & WS_TRAILING_SPACE) &&
-           (1 < plen && isspace(patch[plen-1]))) {
-               if (patch[plen] == '\n')
-                       add_nl_to_tail = 1;
-               plen--;
-               while (0 < plen && isspace(patch[plen]))
-                       plen--;
-               fixed = 1;
-       }
-
-       /*
-        * Check leading whitespaces (indent)
-        */
-       for (i = 1; i < plen; i++) {
-               char ch = patch[i];
-               if (ch == '\t') {
-                       last_tab_in_indent = i;
-                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
-                           0 < last_space_in_indent)
-                           need_fix_leading_space = 1;
-               } else if (ch == ' ') {
-                       last_space_in_indent = i;
-                       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
-                           8 <= i - last_tab_in_indent)
-                               need_fix_leading_space = 1;
-               }
-               else
-                       break;
-       }
-
-       buf = output;
-       if (need_fix_leading_space) {
-               int consecutive_spaces = 0;
-               int last = last_tab_in_indent + 1;
-
-               if (ws_rule & WS_INDENT_WITH_NON_TAB) {
-                       /* have "last" point at one past the indent */
-                       if (last_tab_in_indent < last_space_in_indent)
-                               last = last_space_in_indent + 1;
-                       else
-                               last = last_tab_in_indent + 1;
-               }
-
-               /*
-                * between patch[1..last], strip the funny spaces,
-                * updating them to tab as needed.
-                */
-               for (i = 1; i < last; i++, plen--) {
-                       char ch = patch[i];
-                       if (ch != ' ') {
-                               consecutive_spaces = 0;
-                               *output++ = ch;
-                       } else {
-                               consecutive_spaces++;
-                               if (consecutive_spaces == 8) {
-                                       *output++ = '\t';
-                                       consecutive_spaces = 0;
-                               }
-                       }
-               }
-               while (0 < consecutive_spaces--)
-                       *output++ = ' ';
-               fixed = 1;
-               i = last;
-       }
-       else
-               i = 1;
-
-       memcpy(output, patch + i, plen);
-       if (add_nl_to_tail)
-               output[plen++] = '\n';
-       if (fixed)
-               applied_after_fixing_ws++;
-       return output + plen - buf;
-}
-
 static void update_image(struct image *img,
                         int applied_pos,
                         struct image *preimage,
@@ -1802,20 +1829,24 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
        int match_beginning, match_end;
        const char *patch = frag->patch;
        int size = frag->size;
-       char *old = xmalloc(size);
-       char *new = xmalloc(size);
-       char *oldlines, *newlines;
-       int oldsize = 0, newsize = 0;
+       char *old, *new, *oldlines, *newlines;
        int new_blank_lines_at_end = 0;
        unsigned long leading, trailing;
        int pos, applied_pos;
        struct image preimage;
        struct image postimage;
 
+       memset(&preimage, 0, sizeof(preimage));
+       memset(&postimage, 0, sizeof(postimage));
+       oldlines = xmalloc(size);
+       newlines = xmalloc(size);
+
+       old = oldlines;
+       new = newlines;
        while (size > 0) {
                char first;
                int len = linelen(patch, size);
-               int plen;
+               int plen, added;
                int added_blank_line = 0;
 
                if (!len)
@@ -1828,7 +1859,7 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
                 * followed by "\ No newline", then we also remove the
                 * last one (which is the newline, of course).
                 */
-               plen = len-1;
+               plen = len - 1;
                if (len < size && patch[len] == '\\')
                        plen--;
                first = *patch;
@@ -1845,25 +1876,40 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
                        if (plen < 0)
                                /* ... followed by '\No newline'; nothing */
                                break;
-                       old[oldsize++] = '\n';
-                       new[newsize++] = '\n';
+                       *old++ = '\n';
+                       *new++ = '\n';
+                       add_line_info(&preimage, "\n", 1, LINE_COMMON);
+                       add_line_info(&postimage, "\n", 1, LINE_COMMON);
                        break;
                case ' ':
                case '-':
-                       memcpy(old + oldsize, patch + 1, plen);
-                       oldsize += plen;
+                       memcpy(old, patch + 1, plen);
+                       add_line_info(&preimage, old, plen,
+                                     (first == ' ' ? LINE_COMMON : 0));
+                       old += plen;
                        if (first == '-')
                                break;
                /* Fall-through for ' ' */
                case '+':
-                       if (first != '+' || !no_add) {
-                               int added = apply_line(new + newsize, patch,
-                                                      plen, ws_rule);
-                               newsize += added;
-                               if (first == '+' &&
-                                   added == 1 && new[newsize-1] == '\n')
-                                       added_blank_line = 1;
+                       /* --no-add does not add new lines */
+                       if (first == '+' && no_add)
+                               break;
+
+                       if (first != '+' ||
+                           !whitespace_error ||
+                           ws_error_action != correct_ws_error) {
+                               memcpy(new, patch + 1, plen);
+                               added = plen;
+                       }
+                       else {
+                               added = ws_fix_copy(new, patch + 1, plen, ws_rule, &applied_after_fixing_ws);
                        }
+                       add_line_info(&postimage, new, added,
+                                     (first == '+' ? 0 : LINE_COMMON));
+                       new += added;
+                       if (first == '+' &&
+                           added == 1 && new[-1] == '\n')
+                               added_blank_line = 1;
                        break;
                case '@': case '\\':
                        /* Ignore it, we already handled it */
@@ -1880,16 +1926,13 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
                patch += len;
                size -= len;
        }
-
        if (inaccurate_eof &&
-           oldsize > 0 && old[oldsize - 1] == '\n' &&
-           newsize > 0 && new[newsize - 1] == '\n') {
-               oldsize--;
-               newsize--;
+           old > oldlines && old[-1] == '\n' &&
+           new > newlines && new[-1] == '\n') {
+               old--;
+               new--;
        }
 
-       oldlines = old;
-       newlines = new;
        leading = frag->leading;
        trailing = frag->trailing;
 
@@ -1911,12 +1954,17 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
        }
 
        pos = frag->newpos ? (frag->newpos - 1) : 0;
-       prepare_image(&preimage, oldlines, oldsize, 1);
-       prepare_image(&postimage, newlines, newsize, 1);
+       preimage.buf = oldlines;
+       preimage.len = old - oldlines;
+       postimage.buf = newlines;
+       postimage.len = new - newlines;
+       preimage.line = preimage.line_allocated;
+       postimage.line = postimage.line_allocated;
+
        for (;;) {
 
-               applied_pos = find_pos(img, &preimage, &postimage,
-                                      pos, match_beginning, match_end);
+               applied_pos = find_pos(img, &preimage, &postimage, pos,
+                                      ws_rule, match_beginning, match_end);
 
                if (applied_pos >= 0)
                        break;
@@ -1973,11 +2021,12 @@ static int apply_one_fragment(struct image *img, struct fragment *frag,
                update_image(img, applied_pos, &preimage, &postimage);
        } else {
                if (apply_verbosely)
-                       error("while searching for:\n%.*s", oldsize, oldlines);
+                       error("while searching for:\n%.*s",
+                             (int)(old - oldlines), oldlines);
        }
 
-       free(old);
-       free(new);
+       free(oldlines);
+       free(newlines);
        free(preimage.line_allocated);
        free(postimage.line_allocated);