diff-delta.con commit Prepare 0.99.6 branch. (09661fd)
   1/*
   2 * diff-delta.c: generate a delta between two buffers
   3 *
   4 *  Many parts of this file have been lifted from LibXDiff version 0.10.
   5 *  http://www.xmailserver.org/xdiff-lib.html
   6 *
   7 *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
   8 *  Copyright (C) 2003  Davide Libenzi
   9 *
  10 *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
  11 *
  12 *  This file is free software; you can redistribute it and/or
  13 *  modify it under the terms of the GNU Lesser General Public
  14 *  License as published by the Free Software Foundation; either
  15 *  version 2.1 of the License, or (at your option) any later version.
  16 *
  17 *  Use of this within git automatically means that the LGPL
  18 *  licensing gets turned into GPLv2 within this project.
  19 */
  20
  21#include <stdlib.h>
  22#include "delta.h"
  23
  24
  25/* block size: min = 16, max = 64k, power of 2 */
  26#define BLK_SIZE 16
  27
  28#define MIN(a, b) ((a) < (b) ? (a) : (b))
  29
  30#define GR_PRIME 0x9e370001
  31#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
  32        
  33/* largest prime smaller than 65536 */
  34#define BASE 65521
  35
  36/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
  37#define NMAX 5552
  38
  39#define DO1(buf, i)  { s1 += buf[i]; s2 += s1; }
  40#define DO2(buf, i)  DO1(buf, i); DO1(buf, i + 1);
  41#define DO4(buf, i)  DO2(buf, i); DO2(buf, i + 2);
  42#define DO8(buf, i)  DO4(buf, i); DO4(buf, i + 4);
  43#define DO16(buf)    DO8(buf, 0); DO8(buf, 8);
  44
  45static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
  46{
  47        int k;
  48        unsigned int s1 = adler & 0xffff;
  49        unsigned int s2 = adler >> 16;
  50
  51        while (len > 0) {
  52                k = MIN(len, NMAX);
  53                len -= k;
  54                while (k >= 16) {
  55                        DO16(buf);
  56                        buf += 16;
  57                        k -= 16;
  58                }
  59                if (k != 0)
  60                        do {
  61                                s1 += *buf++;
  62                                s2 += s1;
  63                        } while (--k);
  64                s1 %= BASE;
  65                s2 %= BASE;
  66        }
  67
  68        return (s2 << 16) | s1;
  69}
  70
  71static unsigned int hashbits(unsigned int size)
  72{
  73        unsigned int val = 1, bits = 0;
  74        while (val < size && bits < 32) {
  75                val <<= 1;
  76                bits++;
  77        }
  78        return bits ? bits: 1;
  79}
  80
  81typedef struct s_chanode {
  82        struct s_chanode *next;
  83        int icurr;
  84} chanode_t;
  85
  86typedef struct s_chastore {
  87        chanode_t *head, *tail;
  88        int isize, nsize;
  89        chanode_t *ancur;
  90        chanode_t *sncur;
  91        int scurr;
  92} chastore_t;
  93
  94static void cha_init(chastore_t *cha, int isize, int icount)
  95{
  96        cha->head = cha->tail = NULL;
  97        cha->isize = isize;
  98        cha->nsize = icount * isize;
  99        cha->ancur = cha->sncur = NULL;
 100        cha->scurr = 0;
 101}
 102
 103static void *cha_alloc(chastore_t *cha)
 104{
 105        chanode_t *ancur;
 106        void *data;
 107
 108        ancur = cha->ancur;
 109        if (!ancur || ancur->icurr == cha->nsize) {
 110                ancur = malloc(sizeof(chanode_t) + cha->nsize);
 111                if (!ancur)
 112                        return NULL;
 113                ancur->icurr = 0;
 114                ancur->next = NULL;
 115                if (cha->tail)
 116                        cha->tail->next = ancur;
 117                if (!cha->head)
 118                        cha->head = ancur;
 119                cha->tail = ancur;
 120                cha->ancur = ancur;
 121        }
 122
 123        data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
 124        ancur->icurr += cha->isize;
 125        return data;
 126}
 127
 128static void cha_free(chastore_t *cha)
 129{
 130        chanode_t *cur = cha->head;
 131        while (cur) {
 132                chanode_t *tmp = cur;
 133                cur = cur->next;
 134                free(tmp);
 135        }
 136}
 137
 138typedef struct s_bdrecord {
 139        struct s_bdrecord *next;
 140        unsigned int fp;
 141        const unsigned char *ptr;
 142} bdrecord_t;
 143
 144typedef struct s_bdfile {
 145        const unsigned char *data, *top;
 146        chastore_t cha;
 147        unsigned int fphbits;
 148        bdrecord_t **fphash;
 149} bdfile_t;
 150
 151static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
 152{
 153        unsigned int fphbits;
 154        int i, hsize;
 155        const unsigned char *base, *data, *top;
 156        bdrecord_t *brec;
 157        bdrecord_t **fphash;
 158
 159        fphbits = hashbits(bufsize / BLK_SIZE + 1);
 160        hsize = 1 << fphbits;
 161        fphash = malloc(hsize * sizeof(bdrecord_t *));
 162        if (!fphash)
 163                return -1;
 164        for (i = 0; i < hsize; i++)
 165                fphash[i] = NULL;
 166        cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
 167
 168        bdf->data = data = base = buf;
 169        bdf->top = top = buf + bufsize;
 170        data += (bufsize / BLK_SIZE) * BLK_SIZE;
 171        if (data == top)
 172                data -= BLK_SIZE;
 173
 174        for ( ; data >= base; data -= BLK_SIZE) {
 175                brec = cha_alloc(&bdf->cha);
 176                if (!brec) {
 177                        cha_free(&bdf->cha);
 178                        free(fphash);
 179                        return -1;
 180                }
 181                brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
 182                brec->ptr = data;
 183                i = HASH(brec->fp, fphbits);
 184                brec->next = fphash[i];
 185                fphash[i] = brec;
 186        }
 187
 188        bdf->fphbits = fphbits;
 189        bdf->fphash = fphash;
 190
 191        return 0;
 192}
 193
 194static void delta_cleanup(bdfile_t *bdf)
 195{
 196        free(bdf->fphash);
 197        cha_free(&bdf->cha);
 198}
 199
 200#define COPYOP_SIZE(o, s) \
 201    (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
 202     !!(s & 0xff) + !!(s & 0xff00) + 1)
 203
 204void *diff_delta(void *from_buf, unsigned long from_size,
 205                 void *to_buf, unsigned long to_size,
 206                 unsigned long *delta_size,
 207                 unsigned long max_size)
 208{
 209        int i, outpos, outsize, inscnt, csize, msize, moff;
 210        unsigned int fp;
 211        const unsigned char *data, *top, *ptr1, *ptr2;
 212        unsigned char *out, *orig;
 213        bdrecord_t *brec;
 214        bdfile_t bdf;
 215
 216        if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
 217                return NULL;
 218        
 219        outpos = 0;
 220        outsize = 8192;
 221        out = malloc(outsize);
 222        if (!out) {
 223                delta_cleanup(&bdf);
 224                return NULL;
 225        }
 226
 227        data = to_buf;
 228        top = to_buf + to_size;
 229
 230        /* store reference buffer size */
 231        out[outpos++] = from_size;
 232        from_size >>= 7;
 233        while (from_size) {
 234                out[outpos - 1] |= 0x80;
 235                out[outpos++] = from_size;
 236                from_size >>= 7;
 237        }
 238
 239        /* store target buffer size */
 240        out[outpos++] = to_size;
 241        to_size >>= 7;
 242        while (to_size) {
 243                out[outpos - 1] |= 0x80;
 244                out[outpos++] = to_size;
 245                to_size >>= 7;
 246        }
 247
 248        inscnt = 0;
 249        moff = 0;
 250        while (data < top) {
 251                msize = 0;
 252                fp = adler32(0, data, MIN(top - data, BLK_SIZE));
 253                i = HASH(fp, bdf.fphbits);
 254                for (brec = bdf.fphash[i]; brec; brec = brec->next) {
 255                        if (brec->fp == fp) {
 256                                csize = bdf.top - brec->ptr;
 257                                if (csize > top - data)
 258                                        csize = top - data;
 259                                for (ptr1 = brec->ptr, ptr2 = data; 
 260                                     csize && *ptr1 == *ptr2;
 261                                     csize--, ptr1++, ptr2++);
 262
 263                                csize = ptr1 - brec->ptr;
 264                                if (csize > msize) {
 265                                        moff = brec->ptr - bdf.data;
 266                                        msize = csize;
 267                                        if (msize >= 0x10000) {
 268                                                msize = 0x10000;
 269                                                break;
 270                                        }
 271                                }
 272                        }
 273                }
 274
 275                if (!msize || msize < COPYOP_SIZE(moff, msize)) {
 276                        if (!inscnt)
 277                                outpos++;
 278                        out[outpos++] = *data++;
 279                        inscnt++;
 280                        if (inscnt == 0x7f) {
 281                                out[outpos - inscnt - 1] = inscnt;
 282                                inscnt = 0;
 283                        }
 284                } else {
 285                        if (inscnt) {
 286                                out[outpos - inscnt - 1] = inscnt;
 287                                inscnt = 0;
 288                        }
 289
 290                        data += msize;
 291                        orig = out + outpos++;
 292                        i = 0x80;
 293
 294                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
 295                        moff >>= 8;
 296                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
 297                        moff >>= 8;
 298                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
 299                        moff >>= 8;
 300                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
 301
 302                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
 303                        msize >>= 8;
 304                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
 305
 306                        *orig = i;
 307                }
 308
 309                if (max_size && outpos > max_size) {
 310                        free(out);
 311                        delta_cleanup(&bdf);
 312                        return NULL;
 313                }
 314
 315                /* next time around the largest possible output is 1 + 4 + 3 */
 316                if (outpos > outsize - 8) {
 317                        void *tmp = out;
 318                        outsize = outsize * 3 / 2;
 319                        out = realloc(out, outsize);
 320                        if (!out) {
 321                                free(tmp);
 322                                delta_cleanup(&bdf);
 323                                return NULL;
 324                        }
 325                }
 326        }
 327
 328        if (inscnt)
 329                out[outpos - inscnt - 1] = inscnt;
 330
 331        delta_cleanup(&bdf);
 332        *delta_size = outpos;
 333        return out;
 334}