From: Junio C Hamano Date: Tue, 9 May 2006 21:16:56 +0000 (-0700) Subject: Merge branch 'jc/bindiff' X-Git-Tag: v1.4.0-rc1~159 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/2fc240a7b21c060529c1d2e19d6b483361f81f2a?hp=-c Merge branch 'jc/bindiff' * jc/bindiff: improve base85 generated assembly code binary diff and apply: testsuite. binary diff: further updates. binary patch. --- 2fc240a7b21c060529c1d2e19d6b483361f81f2a diff --combined apply.c index ca36391bb2,1b93aab8af..7c8146a7f3 --- a/apply.c +++ b/apply.c @@@ -10,6 -10,7 +10,7 @@@ #include "cache.h" #include "quote.h" #include "blob.h" + #include "delta.h" // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch @@@ -19,7 -20,6 +20,7 @@@ // static const char *prefix; static int prefix_length = -1; +static int newfd = -1; static int p_value = 1; static int allow_binary_replacement = 0; @@@ -114,6 -114,9 +115,9 @@@ struct patch char *new_name, *old_name, *def_name; unsigned int old_mode, new_mode; int is_rename, is_copy, is_new, is_delete, is_binary; + #define BINARY_DELTA_DEFLATED 1 + #define BINARY_LITERAL_DEFLATED 2 + unsigned long deflate_origlen; int lines_added, lines_deleted; int score; struct fragment *fragments; @@@ -967,6 -970,88 +971,88 @@@ static inline int metadata_changes(stru patch->old_mode != patch->new_mode); } + static int parse_binary(char *buffer, unsigned long size, struct patch *patch) + { + /* We have read "GIT binary patch\n"; what follows is a line + * that says the patch method (currently, either "deflated + * literal" or "deflated delta") and the length of data before + * deflating; a sequence of 'length-byte' followed by base-85 + * encoded data follows. + * + * Each 5-byte sequence of base-85 encodes up to 4 bytes, + * and we would limit the patch line to 66 characters, + * so one line can fit up to 13 groups that would decode + * to 52 bytes max. The length byte 'A'-'Z' corresponds + * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes. + * The end of binary is signalled with an empty line. + */ + int llen, used; + struct fragment *fragment; + char *data = NULL; + + patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); + + /* Grab the type of patch */ + llen = linelen(buffer, size); + used = llen; + linenr++; + + if (!strncmp(buffer, "delta ", 6)) { + patch->is_binary = BINARY_DELTA_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 6, NULL, 10); + } + else if (!strncmp(buffer, "literal ", 8)) { + patch->is_binary = BINARY_LITERAL_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 8, NULL, 10); + } + else + return error("unrecognized binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); + buffer += llen; + while (1) { + int byte_length, max_byte_length, newsize; + llen = linelen(buffer, size); + used += llen; + linenr++; + if (llen == 1) + break; + /* Minimum line is "A00000\n" which is 7-byte long, + * and the line length must be multiple of 5 plus 2. + */ + if ((llen < 7) || (llen-2) % 5) + goto corrupt; + max_byte_length = (llen - 2) / 5 * 4; + byte_length = *buffer; + if ('A' <= byte_length && byte_length <= 'Z') + byte_length = byte_length - 'A' + 1; + else if ('a' <= byte_length && byte_length <= 'z') + byte_length = byte_length - 'a' + 27; + else + goto corrupt; + /* if the input length was not multiple of 4, we would + * have filler at the end but the filler should never + * exceed 3 bytes + */ + if (max_byte_length < byte_length || + byte_length <= max_byte_length - 4) + goto corrupt; + newsize = fragment->size + byte_length; + data = xrealloc(data, newsize); + if (decode_85(data + fragment->size, + buffer + 1, + byte_length)) + goto corrupt; + fragment->size = newsize; + buffer += llen; + size -= llen; + } + fragment->patch = data; + return used; + corrupt: + return error("corrupt binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); + } + static int parse_chunk(char *buffer, unsigned long size, struct patch *patch) { int hdrsize, patchsize; @@@ -983,19 -1068,34 +1069,34 @@@ "Files ", NULL, }; + static const char git_binary[] = "GIT binary patch\n"; int i; int hd = hdrsize + offset; unsigned long llen = linelen(buffer + hd, size - hd); - if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) + if (llen == sizeof(git_binary) - 1 && + !memcmp(git_binary, buffer + hd, llen)) { + int used; + linenr++; + used = parse_binary(buffer + hd + llen, + size - hd - llen, patch); + if (used) + patchsize = used + llen; + else + patchsize = 0; + } + else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) { for (i = 0; binhdr[i]; i++) { int len = strlen(binhdr[i]); if (len < size - hd && !memcmp(binhdr[i], buffer + hd, len)) { + linenr++; patch->is_binary = 1; + patchsize = llen; break; } } + } /* Empty patch cannot be applied if: * - it is a binary patch and we do not do binary_replace, or @@@ -1346,76 -1446,150 +1447,150 @@@ static int apply_one_fragment(struct bu return offset; } - static int apply_fragments(struct buffer_desc *desc, struct patch *patch) + static char *inflate_it(const void *data, unsigned long size, + unsigned long inflated_size) + { + z_stream stream; + void *out; + int st; + + memset(&stream, 0, sizeof(stream)); + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + stream.next_out = out = xmalloc(inflated_size); + stream.avail_out = inflated_size; + inflateInit(&stream); + st = inflate(&stream, Z_FINISH); + if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { + free(out); + return NULL; + } + return out; + } + + static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch) + { + unsigned long dst_size; + struct fragment *fragment = patch->fragments; + void *data; + void *result; + + data = inflate_it(fragment->patch, fragment->size, + patch->deflate_origlen); + if (!data) + return error("corrupt patch data"); + switch (patch->is_binary) { + case BINARY_DELTA_DEFLATED: + result = patch_delta(desc->buffer, desc->size, + data, + patch->deflate_origlen, + &dst_size); + free(desc->buffer); + desc->buffer = result; + free(data); + break; + case BINARY_LITERAL_DEFLATED: + free(desc->buffer); + desc->buffer = data; + dst_size = patch->deflate_origlen; + break; + } + if (!desc->buffer) + return -1; + desc->size = desc->alloc = dst_size; + return 0; + } + + static int apply_binary(struct buffer_desc *desc, struct patch *patch) { - struct fragment *frag = patch->fragments; const char *name = patch->old_name ? patch->old_name : patch->new_name; + unsigned char sha1[20]; + unsigned char hdr[50]; + int hdrlen; - if (patch->is_binary) { - unsigned char sha1[20]; + if (!allow_binary_replacement) + return error("cannot apply binary patch to '%s' " + "without --allow-binary-replacement", + name); - if (!allow_binary_replacement) - return error("cannot apply binary patch to '%s' " - "without --allow-binary-replacement", - name); + /* For safety, we require patch index line to contain + * full 40-byte textual SHA1 for old and new, at least for now. + */ + if (strlen(patch->old_sha1_prefix) != 40 || + strlen(patch->new_sha1_prefix) != 40 || + get_sha1_hex(patch->old_sha1_prefix, sha1) || + get_sha1_hex(patch->new_sha1_prefix, sha1)) + return error("cannot apply binary patch to '%s' " + "without full index line", name); - /* For safety, we require patch index line to contain - * full 40-byte textual SHA1 for old and new, at least for now. + if (patch->old_name) { + /* See if the old one matches what the patch + * applies to. */ - if (strlen(patch->old_sha1_prefix) != 40 || - strlen(patch->new_sha1_prefix) != 40 || - get_sha1_hex(patch->old_sha1_prefix, sha1) || - get_sha1_hex(patch->new_sha1_prefix, sha1)) - return error("cannot apply binary patch to '%s' " - "without full index line", name); - - if (patch->old_name) { - unsigned char hdr[50]; - int hdrlen; - - /* See if the old one matches what the patch - * applies to. - */ - write_sha1_file_prepare(desc->buffer, desc->size, - blob_type, sha1, hdr, &hdrlen); - if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) - return error("the patch applies to '%s' (%s), " - "which does not match the " - "current contents.", - name, sha1_to_hex(sha1)); - } - else { - /* Otherwise, the old one must be empty. */ - if (desc->size) - return error("the patch applies to an empty " - "'%s' but it is not empty", name); - } + write_sha1_file_prepare(desc->buffer, desc->size, + blob_type, sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) + return error("the patch applies to '%s' (%s), " + "which does not match the " + "current contents.", + name, sha1_to_hex(sha1)); + } + else { + /* Otherwise, the old one must be empty. */ + if (desc->size) + return error("the patch applies to an empty " + "'%s' but it is not empty", name); + } + + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) { + free(desc->buffer); + desc->alloc = desc->size = 0; + desc->buffer = NULL; + return 0; /* deletion patch */ + } - /* For now, we do not record post-image data in the patch, - * and require the object already present in the recipient's - * object database. + if (has_sha1_file(sha1)) { + /* We already have the postimage */ + char type[10]; + unsigned long size; + + free(desc->buffer); + desc->buffer = read_sha1_file(sha1, type, &size); + if (!desc->buffer) + return error("the necessary postimage %s for " + "'%s' cannot be read", + patch->new_sha1_prefix, name); + desc->alloc = desc->size = size; + } + else { + /* We have verified desc matches the preimage; + * apply the patch data to it, which is stored + * in the patch->fragments->{patch,size}. */ - if (desc->buffer) { - free(desc->buffer); - desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - - if (memcmp(sha1, null_sha1, 20)) { - char type[10]; - unsigned long size; - - desc->buffer = read_sha1_file(sha1, type, &size); - if (!desc->buffer) - return error("the necessary postimage %s for " - "'%s' does not exist", - patch->new_sha1_prefix, name); - desc->alloc = desc->size = size; - } + if (apply_binary_fragment(desc, patch)) + return error("binary patch does not apply to '%s'", + name); - return 0; + /* verify that the result matches */ + write_sha1_file_prepare(desc->buffer, desc->size, blob_type, + sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix)) + return error("binary patch to '%s' creates incorrect result", name); } + return 0; + } + + static int apply_fragments(struct buffer_desc *desc, struct patch *patch) + { + struct fragment *frag = patch->fragments; + const char *name = patch->old_name ? patch->old_name : patch->new_name; + + if (patch->is_binary) + return apply_binary(desc, patch); + while (frag) { if (apply_one_fragment(desc, frag) < 0) return error("patch failed: %s:%ld", @@@ -1874,6 -2048,7 +2049,6 @@@ static int use_patch(struct patch *p static int apply_patch(int fd, const char *filename) { - int newfd; unsigned long offset, size; char *buffer = read_patch_file(fd, &size); struct patch *list = NULL, **listp = &list; @@@ -1904,11 -2079,12 +2079,11 @@@ size -= nr; } - newfd = -1; if (whitespace_error && (new_whitespace == error_on_whitespace)) apply = 0; write_index = check_index && apply; - if (write_index) + if (write_index && newfd < 0) newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (check_index) { if (read_cache() < 0) @@@ -1921,6 -2097,12 +2096,6 @@@ if (apply) write_out_results(list, skipped_patch); - if (write_index) { - if (write_cache(newfd, active_cache, active_nr) || - commit_index_file(&cache_file)) - die("Unable to write new cachefile"); - } - if (show_index_info) show_index_list(list); @@@ -1983,7 -2165,8 +2158,8 @@@ int main(int argc, char **argv diffstat = 1; continue; } - if (!strcmp(arg, "--allow-binary-replacement")) { + if (!strcmp(arg, "--allow-binary-replacement") || + !strcmp(arg, "--binary")) { allow_binary_replacement = 1; continue; } @@@ -2078,12 -2261,5 +2254,12 @@@ whitespace_error == 1 ? "" : "s", whitespace_error == 1 ? "s" : ""); } + + if (write_index) { + if (write_cache(newfd, active_cache, active_nr) || + commit_index_file(&cache_file)) + die("Unable to write new cachefile"); + } + return 0; } diff --combined diff.c index 5315270601,bfe54c3e09..7a7b839e56 --- a/diff.c +++ b/diff.c @@@ -8,6 -8,7 +8,7 @@@ #include "quote.h" #include "diff.h" #include "diffcore.h" + #include "delta.h" #include "xdiff-interface.h" static int use_size_cache; @@@ -296,6 -297,7 +297,6 @@@ static const char minuses[]= "--------- static void show_stats(struct diffstat_t* data) { - char *prefix = ""; int i, len, add, del, total, adds = 0, dels = 0; int max, max_change = 0, max_len = 0; int total_files = data->nr; @@@ -317,7 -319,6 +318,7 @@@ } for (i = 0; i < data->nr; i++) { + char *prefix = ""; char *name = data->files[i]->name; int added = data->files[i]->added; int deleted = data->files[i]->deleted; @@@ -391,6 -392,90 +392,90 @@@ total_files, adds, dels); } + static unsigned char *deflate_it(char *data, + unsigned long size, + unsigned long *result_size) + { + int bound; + unsigned char *deflated; + z_stream stream; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, Z_BEST_COMPRESSION); + bound = deflateBound(&stream, size); + deflated = xmalloc(bound); + stream.next_out = deflated; + stream.avail_out = bound; + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + *result_size = stream.total_out; + return deflated; + } + + static void emit_binary_diff(mmfile_t *one, mmfile_t *two) + { + void *cp; + void *delta; + void *deflated; + void *data; + unsigned long orig_size; + unsigned long delta_size; + unsigned long deflate_size; + unsigned long data_size; + + printf("GIT binary patch\n"); + /* We could do deflated delta, or we could do just deflated two, + * whichever is smaller. + */ + delta = NULL; + deflated = deflate_it(two->ptr, two->size, &deflate_size); + if (one->size && two->size) { + delta = diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size, deflate_size); + if (delta) { + void *to_free = delta; + orig_size = delta_size; + delta = deflate_it(delta, delta_size, &delta_size); + free(to_free); + } + } + + if (delta && delta_size < deflate_size) { + printf("delta %lu\n", orig_size); + free(deflated); + data = delta; + data_size = delta_size; + } + else { + printf("literal %lu\n", two->size); + free(delta); + data = deflated; + data_size = deflate_size; + } + + /* emit data encoded in base85 */ + cp = data; + while (data_size) { + int bytes = (52 < data_size) ? 52 : data_size; + char line[70]; + data_size -= bytes; + if (bytes <= 26) + line[0] = bytes + 'A' - 1; + else + line[0] = bytes - 26 + 'a' - 1; + encode_85(line + 1, cp, bytes); + cp += bytes; + puts(line); + } + printf("\n"); + free(data); + } + #define FIRST_FEW_BYTES 8000 static int mmfile_is_binary(mmfile_t *mf) { @@@ -407,6 -492,7 +492,7 @@@ static void builtin_diff(const char *na struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { mmfile_t mf1, mf2; @@@ -451,8 -537,17 +537,17 @@@ if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) - printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { + /* Quite common confusing case */ + if (mf1.size == mf2.size && + !memcmp(mf1.ptr, mf2.ptr, mf1.size)) + goto free_ab_and_return; + if (o->binary) + emit_binary_diff(&mf1, &mf2); + else + printf("Binary files %s and %s differ\n", + lbl[0], lbl[1]); + } else { /* Crazy xdl interfaces.. */ const char *diffopts = getenv("GIT_DIFF_OPTS"); @@@ -928,6 -1023,7 +1023,7 @@@ static void run_diff_cmd(const char *pg struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { if (pgm) { @@@ -937,7 -1033,7 +1033,7 @@@ } if (one && two) builtin_diff(name, other ? other : name, - one, two, xfrm_msg, complete_rewrite); + one, two, xfrm_msg, o, complete_rewrite); else printf("* Unmerged path %s\n", name); } @@@ -971,7 -1067,7 +1067,7 @@@ static void run_diff(struct diff_filepa if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ - run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, o, 0); return; } @@@ -1041,14 -1137,14 +1137,14 @@@ * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); - run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, o, 0); free(null); null = alloc_filespec(one->path); - run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, o, 0); free(null); } else - run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, o, complete_rewrite); free(name_munged); @@@ -1147,6 -1243,10 +1243,10 @@@ int diff_opt_parse(struct diff_options options->rename_limit = strtoul(arg+2, NULL, 10); else if (!strcmp(arg, "--full-index")) options->full_index = 1; + else if (!strcmp(arg, "--binary")) { + options->output_format = DIFF_FORMAT_PATCH; + options->full_index = options->binary = 1; + } else if (!strcmp(arg, "--name-only")) options->output_format = DIFF_FORMAT_NAME; else if (!strcmp(arg, "--name-status"))