diff-delta.con commit git-rev-tree: teach it about tag references (ea84480)
   1/*
   2 * diff-delta.c: generate a delta between two buffers
   3 *
   4 *  Many parts of this file have been lifted from LibXDiff version 0.10.
   5 *  http://www.xmailserver.org/xdiff-lib.html
   6 *
   7 *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
   8 *  Copyright (C) 2003  Davide Libenzi
   9 *
  10 *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
  11 *
  12 *  This file is free software; you can redistribute it and/or
  13 *  modify it under the terms of the GNU Lesser General Public
  14 *  License as published by the Free Software Foundation; either
  15 *  version 2.1 of the License, or (at your option) any later version.
  16 *
  17 *  Use of this within git automatically means that the LGPL
  18 *  licensing gets turned into GPLv2 within this project.
  19 */
  20
  21#include <stdlib.h>
  22#include "delta.h"
  23
  24
  25/* block size: min = 16, max = 64k, power of 2 */
  26#define BLK_SIZE 16
  27
  28#define MIN(a, b) ((a) < (b) ? (a) : (b))
  29
  30#define GR_PRIME 0x9e370001
  31#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
  32        
  33/* largest prime smaller than 65536 */
  34#define BASE 65521
  35
  36/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
  37#define NMAX 5552
  38
  39#define DO1(buf, i)  { s1 += buf[i]; s2 += s1; }
  40#define DO2(buf, i)  DO1(buf, i); DO1(buf, i + 1);
  41#define DO4(buf, i)  DO2(buf, i); DO2(buf, i + 2);
  42#define DO8(buf, i)  DO4(buf, i); DO4(buf, i + 4);
  43#define DO16(buf)    DO8(buf, 0); DO8(buf, 8);
  44
  45static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
  46{
  47        int k;
  48        unsigned int s1 = adler & 0xffff;
  49        unsigned int s2 = adler >> 16;
  50
  51        while (len > 0) {
  52                k = MIN(len, NMAX);
  53                len -= k;
  54                while (k >= 16) {
  55                        DO16(buf);
  56                        buf += 16;
  57                        k -= 16;
  58                }
  59                if (k != 0)
  60                        do {
  61                                s1 += *buf++;
  62                                s2 += s1;
  63                        } while (--k);
  64                s1 %= BASE;
  65                s2 %= BASE;
  66        }
  67
  68        return (s2 << 16) | s1;
  69}
  70
  71static unsigned int hashbits(unsigned int size)
  72{
  73        unsigned int val = 1, bits = 0;
  74        while (val < size && bits < 32) {
  75                val <<= 1;
  76                bits++;
  77        }
  78        return bits ? bits: 1;
  79}
  80
  81typedef struct s_chanode {
  82        struct s_chanode *next;
  83        int icurr;
  84} chanode_t;
  85
  86typedef struct s_chastore {
  87        chanode_t *head, *tail;
  88        int isize, nsize;
  89        chanode_t *ancur;
  90        chanode_t *sncur;
  91        int scurr;
  92} chastore_t;
  93
  94static void cha_init(chastore_t *cha, int isize, int icount)
  95{
  96        cha->head = cha->tail = NULL;
  97        cha->isize = isize;
  98        cha->nsize = icount * isize;
  99        cha->ancur = cha->sncur = NULL;
 100        cha->scurr = 0;
 101}
 102
 103static void *cha_alloc(chastore_t *cha)
 104{
 105        chanode_t *ancur;
 106        void *data;
 107
 108        ancur = cha->ancur;
 109        if (!ancur || ancur->icurr == cha->nsize) {
 110                ancur = malloc(sizeof(chanode_t) + cha->nsize);
 111                if (!ancur)
 112                        return NULL;
 113                ancur->icurr = 0;
 114                ancur->next = NULL;
 115                if (cha->tail)
 116                        cha->tail->next = ancur;
 117                if (!cha->head)
 118                        cha->head = ancur;
 119                cha->tail = ancur;
 120                cha->ancur = ancur;
 121        }
 122
 123        data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
 124        ancur->icurr += cha->isize;
 125        return data;
 126}
 127
 128static void cha_free(chastore_t *cha)
 129{
 130        chanode_t *cur = cha->head;
 131        while (cur) {
 132                chanode_t *tmp = cur;
 133                cur = cur->next;
 134                free(tmp);
 135        }
 136}
 137
 138typedef struct s_bdrecord {
 139        struct s_bdrecord *next;
 140        unsigned int fp;
 141        const unsigned char *ptr;
 142} bdrecord_t;
 143
 144typedef struct s_bdfile {
 145        const unsigned char *data, *top;
 146        chastore_t cha;
 147        unsigned int fphbits;
 148        bdrecord_t **fphash;
 149} bdfile_t;
 150
 151static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
 152{
 153        unsigned int fphbits;
 154        int i, hsize;
 155        const unsigned char *base, *data, *top;
 156        bdrecord_t *brec;
 157        bdrecord_t **fphash;
 158
 159        fphbits = hashbits(bufsize / BLK_SIZE + 1);
 160        hsize = 1 << fphbits;
 161        fphash = malloc(hsize * sizeof(bdrecord_t *));
 162        if (!fphash)
 163                return -1;
 164        for (i = 0; i < hsize; i++)
 165                fphash[i] = NULL;
 166        cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
 167
 168        bdf->data = data = base = buf;
 169        bdf->top = top = buf + bufsize;
 170        data += (bufsize / BLK_SIZE) * BLK_SIZE;
 171        if (data == top)
 172                data -= BLK_SIZE;
 173
 174        for ( ; data >= base; data -= BLK_SIZE) {
 175                brec = cha_alloc(&bdf->cha);
 176                if (!brec) {
 177                        cha_free(&bdf->cha);
 178                        free(fphash);
 179                        return -1;
 180                }
 181                brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
 182                brec->ptr = data;
 183                i = HASH(brec->fp, fphbits);
 184                brec->next = fphash[i];
 185                fphash[i] = brec;
 186        }
 187
 188        bdf->fphbits = fphbits;
 189        bdf->fphash = fphash;
 190
 191        return 0;
 192}
 193
 194static void delta_cleanup(bdfile_t *bdf)
 195{
 196        free(bdf->fphash);
 197        cha_free(&bdf->cha);
 198}
 199
 200#define COPYOP_SIZE(o, s) \
 201    (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
 202     !!(s & 0xff) + !!(s & 0xff00) + 1)
 203
 204void *diff_delta(void *from_buf, unsigned long from_size,
 205                 void *to_buf, unsigned long to_size,
 206                 unsigned long *delta_size)
 207{
 208        int i, outpos, outsize, inscnt, csize, msize, moff;
 209        unsigned int fp;
 210        const unsigned char *data, *top, *ptr1, *ptr2;
 211        unsigned char *out, *orig;
 212        bdrecord_t *brec;
 213        bdfile_t bdf;
 214
 215        if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
 216                return NULL;
 217        
 218        outpos = 0;
 219        outsize = 8192;
 220        out = malloc(outsize);
 221        if (!out) {
 222                delta_cleanup(&bdf);
 223                return NULL;
 224        }
 225
 226        data = to_buf;
 227        top = to_buf + to_size;
 228
 229        /* store reference buffer size */
 230        orig = out + outpos++;
 231        *orig = i = 0;
 232        do {
 233                if (from_size & 0xff) {
 234                        *orig |= (1 << i);
 235                        out[outpos++] = from_size;
 236                }
 237                i++;
 238                from_size >>= 8;
 239        } while (from_size);
 240
 241        /* store target buffer size */
 242        orig = out + outpos++;
 243        *orig = i = 0;
 244        do {
 245                if (to_size & 0xff) {
 246                        *orig |= (1 << i);
 247                        out[outpos++] = to_size;
 248                }
 249                i++;
 250                to_size >>= 8;
 251        } while (to_size);
 252
 253        inscnt = 0;
 254        moff = 0;
 255        while (data < top) {
 256                msize = 0;
 257                fp = adler32(0, data, MIN(top - data, BLK_SIZE));
 258                i = HASH(fp, bdf.fphbits);
 259                for (brec = bdf.fphash[i]; brec; brec = brec->next) {
 260                        if (brec->fp == fp) {
 261                                csize = bdf.top - brec->ptr;
 262                                if (csize > top - data)
 263                                        csize = top - data;
 264                                for (ptr1 = brec->ptr, ptr2 = data; 
 265                                     csize && *ptr1 == *ptr2;
 266                                     csize--, ptr1++, ptr2++);
 267
 268                                csize = ptr1 - brec->ptr;
 269                                if (csize > msize) {
 270                                        moff = brec->ptr - bdf.data;
 271                                        msize = csize;
 272                                        if (msize >= 0x10000) {
 273                                                msize = 0x10000;
 274                                                break;
 275                                        }
 276                                }
 277                        }
 278                }
 279
 280                if (!msize || msize < COPYOP_SIZE(moff, msize)) {
 281                        if (!inscnt)
 282                                outpos++;
 283                        out[outpos++] = *data++;
 284                        inscnt++;
 285                        if (inscnt == 0x7f) {
 286                                out[outpos - inscnt - 1] = inscnt;
 287                                inscnt = 0;
 288                        }
 289                } else {
 290                        if (inscnt) {
 291                                out[outpos - inscnt - 1] = inscnt;
 292                                inscnt = 0;
 293                        }
 294
 295                        data += msize;
 296                        orig = out + outpos++;
 297                        i = 0x80;
 298
 299                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
 300                        moff >>= 8;
 301                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
 302                        moff >>= 8;
 303                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
 304                        moff >>= 8;
 305                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
 306
 307                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
 308                        msize >>= 8;
 309                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
 310
 311                        *orig = i;
 312                }
 313
 314                /* next time around the largest possible output is 1 + 4 + 3 */
 315                if (outpos > outsize - 8) {
 316                        void *tmp = out;
 317                        outsize = outsize * 3 / 2;
 318                        out = realloc(out, outsize);
 319                        if (!out) {
 320                                free(tmp);
 321                                delta_cleanup(&bdf);
 322                                return NULL;
 323                        }
 324                }
 325        }
 326
 327        if (inscnt)
 328                out[outpos - inscnt - 1] = inscnt;
 329
 330        delta_cleanup(&bdf);
 331        *delta_size = outpos;
 332        return out;
 333}