git-apply --binary: clean up and prepare for --reverse
authorJunio C Hamano <junkio@cox.net>
Tue, 15 Aug 2006 09:23:06 +0000 (02:23 -0700)
committerJunio C Hamano <junkio@cox.net>
Tue, 15 Aug 2006 10:11:52 +0000 (03:11 -0700)
This cleans up the implementation of "git-apply --binary", and
implements reverse application of binary patches (when git-diff
is converted to emit reversible binary patches).

Earlier, the types of encoding (either deflated literal or
deflated delta) were stored in is_binary field in struct patch,
which meant that we cannot store more than one fragment that
differ in the encoding for a patch. This moves the information
to a field in struct fragment that is otherwise unused for
binary patches, and makes it possible to hang two (or more, but
two is enough) hunks for a binary patch.

The original "binary patch" output from git-diff is internally
parsed into an "is_binary" patch with one fragment. Upcoming
reversible binary patch output will have two fragments, the
first one being the forward patch and the second one the reverse
patch.

Signed-off-by: Junio C Hamano <junkio@cox.net>
builtin-apply.c
index 267aab0e84da11cf3b708acd46eb65319c515cdb..4573c9abb214c6f7b3119b6e7f594c5ddf09162b 100644 (file)
@@ -109,6 +109,13 @@ static int max_change, max_len;
  */
 static int linenr = 1;
 
+/*
+ * This represents one "hunk" from a patch, starting with
+ * "@@ -oldpos,oldlines +newpos,newlines @@" marker.  The
+ * patch text is pointed at by patch, and its byte length
+ * is stored in size.  leading and trailing are the number
+ * of context lines.
+ */
 struct fragment {
        unsigned long leading, trailing;
        unsigned long oldpos, oldlines;
@@ -118,12 +125,19 @@ struct fragment {
        struct fragment *next;
 };
 
+/*
+ * When dealing with a binary patch, we reuse "leading" field
+ * to store the type of the binary hunk, either deflated "delta"
+ * or deflated "literal".
+ */
+#define binary_patch_method leading
+#define BINARY_DELTA_DEFLATED  1
+#define BINARY_LITERAL_DEFLATED 2
+
 struct patch {
        char *new_name, *old_name, *def_name;
        unsigned int old_mode, new_mode;
        int is_rename, is_copy, is_new, is_delete, is_binary;
-#define BINARY_DELTA_DEFLATED 1
-#define BINARY_LITERAL_DEFLATED 2
        unsigned long deflate_origlen;
        int lines_added, lines_deleted;
        int score;
@@ -979,43 +993,70 @@ static inline int metadata_changes(struct patch *patch)
                 patch->old_mode != patch->new_mode);
 }
 
-static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
+static char *inflate_it(const void *data, unsigned long size,
+                       unsigned long inflated_size)
 {
-       /* We have read "GIT binary patch\n"; what follows is a line
-        * that says the patch method (currently, either "deflated
-        * literal" or "deflated delta") and the length of data before
-        * deflating; a sequence of 'length-byte' followed by base-85
-        * encoded data follows.
+       z_stream stream;
+       void *out;
+       int st;
+
+       memset(&stream, 0, sizeof(stream));
+
+       stream.next_in = (unsigned char *)data;
+       stream.avail_in = size;
+       stream.next_out = out = xmalloc(inflated_size);
+       stream.avail_out = inflated_size;
+       inflateInit(&stream);
+       st = inflate(&stream, Z_FINISH);
+       if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
+               free(out);
+               return NULL;
+       }
+       return out;
+}
+
+static struct fragment *parse_binary_hunk(char **buf_p,
+                                         unsigned long *sz_p,
+                                         int *status_p,
+                                         int *used_p)
+{
+       /* Expect a line that begins with binary patch method ("literal"
+        * or "delta"), followed by the length of data before deflating.
+        * a sequence of 'length-byte' followed by base-85 encoded data
+        * should follow, terminated by a newline.
         *
         * Each 5-byte sequence of base-85 encodes up to 4 bytes,
         * and we would limit the patch line to 66 characters,
         * so one line can fit up to 13 groups that would decode
         * to 52 bytes max.  The length byte 'A'-'Z' corresponds
         * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes.
-        * The end of binary is signaled with an empty line.
         */
        int llen, used;
-       struct fragment *fragment;
+       unsigned long size = *sz_p;
+       char *buffer = *buf_p;
+       int patch_method;
+       unsigned long origlen;
        char *data = NULL;
+       int hunk_size = 0;
+       struct fragment *frag;
 
-       patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
-
-       /* Grab the type of patch */
        llen = linelen(buffer, size);
        used = llen;
-       linenr++;
+
+       *status_p = 0;
 
        if (!strncmp(buffer, "delta ", 6)) {
-               patch->is_binary = BINARY_DELTA_DEFLATED;
-               patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
+               patch_method = BINARY_DELTA_DEFLATED;
+               origlen = strtoul(buffer + 6, NULL, 10);
        }
        else if (!strncmp(buffer, "literal ", 8)) {
-               patch->is_binary = BINARY_LITERAL_DEFLATED;
-               patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
+               patch_method = BINARY_LITERAL_DEFLATED;
+               origlen = strtoul(buffer + 8, NULL, 10);
        }
        else
-               return error("unrecognized binary patch at line %d: %.*s",
-                            linenr-1, llen-1, buffer);
+               return NULL;
+
+       linenr++;
        buffer += llen;
        while (1) {
                int byte_length, max_byte_length, newsize;
@@ -1044,21 +1085,79 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
                if (max_byte_length < byte_length ||
                    byte_length <= max_byte_length - 4)
                        goto corrupt;
-               newsize = fragment->size + byte_length;
+               newsize = hunk_size + byte_length;
                data = xrealloc(data, newsize);
-               if (decode_85(data + fragment->size,
-                             buffer + 1,
-                             byte_length))
+               if (decode_85(data + hunk_size, buffer + 1, byte_length))
                        goto corrupt;
-               fragment->size = newsize;
+               hunk_size = newsize;
                buffer += llen;
                size -= llen;
        }
-       fragment->patch = data;
-       return used;
+
+       frag = xcalloc(1, sizeof(*frag));
+       frag->patch = inflate_it(data, hunk_size, origlen);
+       if (!frag->patch)
+               goto corrupt;
+       free(data);
+       frag->size = origlen;
+       *buf_p = buffer;
+       *sz_p = size;
+       *used_p = used;
+       frag->binary_patch_method = patch_method;
+       return frag;
+
  corrupt:
-       return error("corrupt binary patch at line %d: %.*s",
-                    linenr-1, llen-1, buffer);
+       if (data)
+               free(data);
+       *status_p = -1;
+       error("corrupt binary patch at line %d: %.*s",
+             linenr-1, llen-1, buffer);
+       return NULL;
+}
+
+static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
+{
+       /* We have read "GIT binary patch\n"; what follows is a line
+        * that says the patch method (currently, either "literal" or
+        * "delta") and the length of data before deflating; a
+        * sequence of 'length-byte' followed by base-85 encoded data
+        * follows.
+        *
+        * When a binary patch is reversible, there is another binary
+        * hunk in the same format, starting with patch method (either
+        * "literal" or "delta") with the length of data, and a sequence
+        * of length-byte + base-85 encoded data, terminated with another
+        * empty line.  This data, when applied to the postimage, produces
+        * the preimage.
+        */
+       struct fragment *forward;
+       struct fragment *reverse;
+       int status;
+       int used, used_1;
+
+       forward = parse_binary_hunk(&buffer, &size, &status, &used);
+       if (!forward && !status)
+               /* there has to be one hunk (forward hunk) */
+               return error("unrecognized binary patch at line %d", linenr-1);
+       if (status)
+               /* otherwise we already gave an error message */
+               return status;
+
+       reverse = parse_binary_hunk(&buffer, &size, &status, &used_1);
+       if (reverse)
+               used += used_1;
+       else if (status) {
+               /* not having reverse hunk is not an error, but having
+                * a corrupt reverse hunk is.
+                */
+               free((void*) forward->patch);
+               free(forward);
+               return status;
+       }
+       forward->next = reverse;
+       patch->fragments = forward;
+       patch->is_binary = 1;
+       return used;
 }
 
 static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
@@ -1505,28 +1604,6 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
        return offset;
 }
 
-static char *inflate_it(const void *data, unsigned long size,
-                       unsigned long inflated_size)
-{
-       z_stream stream;
-       void *out;
-       int st;
-
-       memset(&stream, 0, sizeof(stream));
-
-       stream.next_in = (unsigned char *)data;
-       stream.avail_in = size;
-       stream.next_out = out = xmalloc(inflated_size);
-       stream.avail_out = inflated_size;
-       inflateInit(&stream);
-       st = inflate(&stream, Z_FINISH);
-       if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
-               free(out);
-               return NULL;
-       }
-       return out;
-}
-
 static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
 {
        unsigned long dst_size;
@@ -1534,30 +1611,29 @@ static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
        void *data;
        void *result;
 
-       /* Binary patch is irreversible */
-       if (apply_in_reverse)
-               return error("cannot reverse-apply a binary patch to '%s'",
-                            patch->new_name
-                            ? patch->new_name : patch->old_name);
-
-       data = inflate_it(fragment->patch, fragment->size,
-                         patch->deflate_origlen);
-       if (!data)
-               return error("corrupt patch data");
-       switch (patch->is_binary) {
+       /* Binary patch is irreversible without the optional second hunk */
+       if (apply_in_reverse) {
+               if (!fragment->next)
+                       return error("cannot reverse-apply a binary patch "
+                                    "without the reverse hunk to '%s'",
+                                    patch->new_name
+                                    ? patch->new_name : patch->old_name);
+               fragment = fragment;
+       }
+       data = (void*) fragment->patch;
+       switch (fragment->binary_patch_method) {
        case BINARY_DELTA_DEFLATED:
                result = patch_delta(desc->buffer, desc->size,
                                     data,
-                                    patch->deflate_origlen,
+                                    fragment->size,
                                     &dst_size);
                free(desc->buffer);
                desc->buffer = result;
-               free(data);
                break;
        case BINARY_LITERAL_DEFLATED:
                free(desc->buffer);
                desc->buffer = data;
-               dst_size = patch->deflate_origlen;
+               dst_size = fragment->size;
                break;
        }
        if (!desc->buffer)