diff-delta.con commit Merge fixes up to GIT 1.0.4 (2414721)
   1/*
   2 * diff-delta.c: generate a delta between two buffers
   3 *
   4 *  Many parts of this file have been lifted from LibXDiff version 0.10.
   5 *  http://www.xmailserver.org/xdiff-lib.html
   6 *
   7 *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
   8 *  Copyright (C) 2003  Davide Libenzi
   9 *
  10 *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
  11 *
  12 *  This file is free software; you can redistribute it and/or
  13 *  modify it under the terms of the GNU Lesser General Public
  14 *  License as published by the Free Software Foundation; either
  15 *  version 2.1 of the License, or (at your option) any later version.
  16 *
  17 *  Use of this within git automatically means that the LGPL
  18 *  licensing gets turned into GPLv2 within this project.
  19 */
  20
  21#include <stdlib.h>
  22#include "delta.h"
  23
  24
  25/* block size: min = 16, max = 64k, power of 2 */
  26#define BLK_SIZE 16
  27
  28#define MIN(a, b) ((a) < (b) ? (a) : (b))
  29
  30#define GR_PRIME 0x9e370001
  31#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
  32        
  33/* largest prime smaller than 65536 */
  34#define BASE 65521
  35
  36/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
  37#define NMAX 5552
  38
  39#define DO1(buf, i)  { s1 += buf[i]; s2 += s1; }
  40#define DO2(buf, i)  DO1(buf, i); DO1(buf, i + 1);
  41#define DO4(buf, i)  DO2(buf, i); DO2(buf, i + 2);
  42#define DO8(buf, i)  DO4(buf, i); DO4(buf, i + 4);
  43#define DO16(buf)    DO8(buf, 0); DO8(buf, 8);
  44
  45static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
  46{
  47        int k;
  48        unsigned int s1 = adler & 0xffff;
  49        unsigned int s2 = adler >> 16;
  50
  51        while (len > 0) {
  52                k = MIN(len, NMAX);
  53                len -= k;
  54                while (k >= 16) {
  55                        DO16(buf);
  56                        buf += 16;
  57                        k -= 16;
  58                }
  59                if (k != 0)
  60                        do {
  61                                s1 += *buf++;
  62                                s2 += s1;
  63                        } while (--k);
  64                s1 %= BASE;
  65                s2 %= BASE;
  66        }
  67
  68        return (s2 << 16) | s1;
  69}
  70
  71static unsigned int hashbits(unsigned int size)
  72{
  73        unsigned int val = 1, bits = 0;
  74        while (val < size && bits < 32) {
  75                val <<= 1;
  76                bits++;
  77        }
  78        return bits ? bits: 1;
  79}
  80
  81typedef struct s_chanode {
  82        struct s_chanode *next;
  83        int icurr;
  84} chanode_t;
  85
  86typedef struct s_chastore {
  87        int isize, nsize;
  88        chanode_t *ancur;
  89} chastore_t;
  90
  91static void cha_init(chastore_t *cha, int isize, int icount)
  92{
  93        cha->isize = isize;
  94        cha->nsize = icount * isize;
  95        cha->ancur = NULL;
  96}
  97
  98static void *cha_alloc(chastore_t *cha)
  99{
 100        chanode_t *ancur;
 101        void *data;
 102
 103        ancur = cha->ancur;
 104        if (!ancur || ancur->icurr == cha->nsize) {
 105                ancur = malloc(sizeof(chanode_t) + cha->nsize);
 106                if (!ancur)
 107                        return NULL;
 108                ancur->icurr = 0;
 109                ancur->next = cha->ancur;
 110                cha->ancur = ancur;
 111        }
 112
 113        data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
 114        ancur->icurr += cha->isize;
 115        return data;
 116}
 117
 118static void cha_free(chastore_t *cha)
 119{
 120        chanode_t *cur = cha->ancur;
 121        while (cur) {
 122                chanode_t *tmp = cur;
 123                cur = cur->next;
 124                free(tmp);
 125        }
 126}
 127
 128typedef struct s_bdrecord {
 129        struct s_bdrecord *next;
 130        unsigned int fp;
 131        const unsigned char *ptr;
 132} bdrecord_t;
 133
 134typedef struct s_bdfile {
 135        chastore_t cha;
 136        unsigned int fphbits;
 137        bdrecord_t **fphash;
 138} bdfile_t;
 139
 140static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
 141{
 142        unsigned int fphbits;
 143        int i, hsize;
 144        const unsigned char *data, *top;
 145        bdrecord_t *brec;
 146        bdrecord_t **fphash;
 147
 148        fphbits = hashbits(bufsize / BLK_SIZE + 1);
 149        hsize = 1 << fphbits;
 150        fphash = malloc(hsize * sizeof(bdrecord_t *));
 151        if (!fphash)
 152                return -1;
 153        for (i = 0; i < hsize; i++)
 154                fphash[i] = NULL;
 155        cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
 156
 157        top = buf + bufsize;
 158        data = buf + (bufsize / BLK_SIZE) * BLK_SIZE;
 159        if (data == top)
 160                data -= BLK_SIZE;
 161
 162        for ( ; data >= buf; data -= BLK_SIZE) {
 163                brec = cha_alloc(&bdf->cha);
 164                if (!brec) {
 165                        cha_free(&bdf->cha);
 166                        free(fphash);
 167                        return -1;
 168                }
 169                brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
 170                brec->ptr = data;
 171                i = HASH(brec->fp, fphbits);
 172                brec->next = fphash[i];
 173                fphash[i] = brec;
 174        }
 175
 176        bdf->fphbits = fphbits;
 177        bdf->fphash = fphash;
 178
 179        return 0;
 180}
 181
 182static void delta_cleanup(bdfile_t *bdf)
 183{
 184        free(bdf->fphash);
 185        cha_free(&bdf->cha);
 186}
 187
 188#define COPYOP_SIZE(o, s) \
 189    (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
 190     !!(s & 0xff) + !!(s & 0xff00) + 1)
 191
 192void *diff_delta(void *from_buf, unsigned long from_size,
 193                 void *to_buf, unsigned long to_size,
 194                 unsigned long *delta_size,
 195                 unsigned long max_size)
 196{
 197        int i, outpos, outsize, inscnt, csize, msize, moff;
 198        unsigned int fp;
 199        const unsigned char *ref_data, *ref_top, *data, *top, *ptr1, *ptr2;
 200        unsigned char *out, *orig;
 201        bdrecord_t *brec;
 202        bdfile_t bdf;
 203
 204        if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
 205                return NULL;
 206        
 207        outpos = 0;
 208        outsize = 8192;
 209        out = malloc(outsize);
 210        if (!out) {
 211                delta_cleanup(&bdf);
 212                return NULL;
 213        }
 214
 215        ref_data = from_buf;
 216        ref_top = from_buf + from_size;
 217        data = to_buf;
 218        top = to_buf + to_size;
 219
 220        /* store reference buffer size */
 221        out[outpos++] = from_size;
 222        from_size >>= 7;
 223        while (from_size) {
 224                out[outpos - 1] |= 0x80;
 225                out[outpos++] = from_size;
 226                from_size >>= 7;
 227        }
 228
 229        /* store target buffer size */
 230        out[outpos++] = to_size;
 231        to_size >>= 7;
 232        while (to_size) {
 233                out[outpos - 1] |= 0x80;
 234                out[outpos++] = to_size;
 235                to_size >>= 7;
 236        }
 237
 238        inscnt = 0;
 239        moff = 0;
 240        while (data < top) {
 241                msize = 0;
 242                fp = adler32(0, data, MIN(top - data, BLK_SIZE));
 243                i = HASH(fp, bdf.fphbits);
 244                for (brec = bdf.fphash[i]; brec; brec = brec->next) {
 245                        if (brec->fp == fp) {
 246                                csize = ref_top - brec->ptr;
 247                                if (csize > top - data)
 248                                        csize = top - data;
 249                                for (ptr1 = brec->ptr, ptr2 = data; 
 250                                     csize && *ptr1 == *ptr2;
 251                                     csize--, ptr1++, ptr2++);
 252
 253                                csize = ptr1 - brec->ptr;
 254                                if (csize > msize) {
 255                                        moff = brec->ptr - ref_data;
 256                                        msize = csize;
 257                                        if (msize >= 0x10000) {
 258                                                msize = 0x10000;
 259                                                break;
 260                                        }
 261                                }
 262                        }
 263                }
 264
 265                if (!msize || msize < COPYOP_SIZE(moff, msize)) {
 266                        if (!inscnt)
 267                                outpos++;
 268                        out[outpos++] = *data++;
 269                        inscnt++;
 270                        if (inscnt == 0x7f) {
 271                                out[outpos - inscnt - 1] = inscnt;
 272                                inscnt = 0;
 273                        }
 274                } else {
 275                        if (inscnt) {
 276                                out[outpos - inscnt - 1] = inscnt;
 277                                inscnt = 0;
 278                        }
 279
 280                        data += msize;
 281                        orig = out + outpos++;
 282                        i = 0x80;
 283
 284                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
 285                        moff >>= 8;
 286                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
 287                        moff >>= 8;
 288                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
 289                        moff >>= 8;
 290                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
 291
 292                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
 293                        msize >>= 8;
 294                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
 295
 296                        *orig = i;
 297                }
 298
 299                if (max_size && outpos > max_size) {
 300                        free(out);
 301                        delta_cleanup(&bdf);
 302                        return NULL;
 303                }
 304
 305                /* next time around the largest possible output is 1 + 4 + 3 */
 306                if (outpos > outsize - 8) {
 307                        void *tmp = out;
 308                        outsize = outsize * 3 / 2;
 309                        out = realloc(out, outsize);
 310                        if (!out) {
 311                                free(tmp);
 312                                delta_cleanup(&bdf);
 313                                return NULL;
 314                        }
 315                }
 316        }
 317
 318        if (inscnt)
 319                out[outpos - inscnt - 1] = inscnt;
 320
 321        delta_cleanup(&bdf);
 322        *delta_size = outpos;
 323        return out;
 324}