diff-delta.con commit count-delta: tweak counting of copied source material. (907380e)
   1/*
   2 * diff-delta.c: generate a delta between two buffers
   3 *
   4 *  Many parts of this file have been lifted from LibXDiff version 0.10.
   5 *  http://www.xmailserver.org/xdiff-lib.html
   6 *
   7 *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
   8 *  Copyright (C) 2003  Davide Libenzi
   9 *
  10 *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
  11 *
  12 *  This file is free software; you can redistribute it and/or
  13 *  modify it under the terms of the GNU Lesser General Public
  14 *  License as published by the Free Software Foundation; either
  15 *  version 2.1 of the License, or (at your option) any later version.
  16 *
  17 *  Use of this within git automatically means that the LGPL
  18 *  licensing gets turned into GPLv2 within this project.
  19 */
  20
  21#include <stdlib.h>
  22#include <string.h>
  23#include "delta.h"
  24
  25
  26struct index {
  27        const unsigned char *ptr;
  28        struct index *next;
  29};
  30
  31static struct index ** delta_index(const unsigned char *buf,
  32                                   unsigned long bufsize,
  33                                   unsigned int *hash_shift)
  34{
  35        unsigned long hsize;
  36        unsigned int hshift, i;
  37        const unsigned char *data;
  38        struct index *entry, **hash;
  39        void *mem;
  40
  41        /* determine index hash size */
  42        hsize = bufsize / 4;
  43        for (i = 8; (1 << i) < hsize && i < 16; i++);
  44        hsize = 1 << i;
  45        hshift = i - 8;
  46        *hash_shift = hshift;
  47
  48        /* allocate lookup index */
  49        mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
  50        if (!mem)
  51                return NULL;
  52        hash = mem;
  53        entry = mem + hsize * sizeof(*hash);
  54        memset(hash, 0, hsize * sizeof(*hash));
  55
  56        /* then populate it */
  57        data = buf + bufsize - 2;
  58        while (data > buf) {
  59                entry->ptr = --data;
  60                i = data[0] ^ data[1] ^ (data[2] << hshift);
  61                entry->next = hash[i];
  62                hash[i] = entry++;
  63        }
  64
  65        return hash;
  66}
  67
  68/* provide the size of the copy opcode given the block offset and size */
  69#define COPYOP_SIZE(o, s) \
  70    (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
  71     !!(s & 0xff) + !!(s & 0xff00) + 1)
  72
  73/* the maximum size for any opcode */
  74#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
  75
  76void *diff_delta(void *from_buf, unsigned long from_size,
  77                 void *to_buf, unsigned long to_size,
  78                 unsigned long *delta_size,
  79                 unsigned long max_size)
  80{
  81        unsigned int i, outpos, outsize, inscnt, hash_shift;
  82        const unsigned char *ref_data, *ref_top, *data, *top;
  83        unsigned char *out;
  84        struct index *entry, **hash;
  85
  86        if (!from_size || !to_size)
  87                return NULL;
  88        hash = delta_index(from_buf, from_size, &hash_shift);
  89        if (!hash)
  90                return NULL;
  91
  92        outpos = 0;
  93        outsize = 8192;
  94        if (max_size && outsize >= max_size)
  95                outsize = max_size + MAX_OP_SIZE + 1;
  96        out = malloc(outsize);
  97        if (!out) {
  98                free(hash);
  99                return NULL;
 100        }
 101
 102        ref_data = from_buf;
 103        ref_top = from_buf + from_size;
 104        data = to_buf;
 105        top = to_buf + to_size;
 106
 107        /* store reference buffer size */
 108        out[outpos++] = from_size;
 109        from_size >>= 7;
 110        while (from_size) {
 111                out[outpos - 1] |= 0x80;
 112                out[outpos++] = from_size;
 113                from_size >>= 7;
 114        }
 115
 116        /* store target buffer size */
 117        out[outpos++] = to_size;
 118        to_size >>= 7;
 119        while (to_size) {
 120                out[outpos - 1] |= 0x80;
 121                out[outpos++] = to_size;
 122                to_size >>= 7;
 123        }
 124
 125        inscnt = 0;
 126
 127        while (data < top) {
 128                unsigned int moff = 0, msize = 0;
 129                if (data + 2 < top) {
 130                        i = data[0] ^ data[1] ^ (data[2] << hash_shift);
 131                        for (entry = hash[i]; entry; entry = entry->next) {
 132                                const unsigned char *ref = entry->ptr;
 133                                const unsigned char *src = data;
 134                                unsigned int ref_size = ref_top - ref;
 135                                if (ref_size > top - src)
 136                                        ref_size = top - src;
 137                                if (ref_size > 0x10000)
 138                                        ref_size = 0x10000;
 139                                if (ref_size <= msize)
 140                                        break;
 141                                while (ref_size && *src++ == *ref) {
 142                                        ref++;
 143                                        ref_size--;
 144                                }
 145                                ref_size = ref - entry->ptr;
 146                                if (msize < ref - entry->ptr) {
 147                                        /* this is our best match so far */
 148                                        msize = ref - entry->ptr;
 149                                        moff = entry->ptr - ref_data;
 150                                }
 151                        }
 152                }
 153
 154                if (!msize || msize < COPYOP_SIZE(moff, msize)) {
 155                        if (!inscnt)
 156                                outpos++;
 157                        out[outpos++] = *data++;
 158                        inscnt++;
 159                        if (inscnt == 0x7f) {
 160                                out[outpos - inscnt - 1] = inscnt;
 161                                inscnt = 0;
 162                        }
 163                } else {
 164                        unsigned char *op;
 165
 166                        if (inscnt) {
 167                                out[outpos - inscnt - 1] = inscnt;
 168                                inscnt = 0;
 169                        }
 170
 171                        data += msize;
 172                        op = out + outpos++;
 173                        i = 0x80;
 174
 175                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
 176                        moff >>= 8;
 177                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
 178                        moff >>= 8;
 179                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
 180                        moff >>= 8;
 181                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
 182
 183                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
 184                        msize >>= 8;
 185                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
 186
 187                        *op = i;
 188                }
 189
 190                if (outpos >= outsize - MAX_OP_SIZE) {
 191                        void *tmp = out;
 192                        outsize = outsize * 3 / 2;
 193                        if (max_size && outsize >= max_size)
 194                                outsize = max_size + MAX_OP_SIZE + 1;
 195                        if (max_size && outpos > max_size)
 196                                out = NULL;
 197                        else
 198                                out = realloc(out, outsize);
 199                        if (!out) {
 200                                free(tmp);
 201                                free(hash);
 202                                return NULL;
 203                        }
 204                }
 205        }
 206
 207        if (inscnt)
 208                out[outpos - inscnt - 1] = inscnt;
 209
 210        free(hash);
 211        *delta_size = outpos;
 212        return out;
 213}