0d9511849993f01e5886ba2bd5a529a66e00281f
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "delta.h"
   6#include "pack.h"
   7#include "csum-file.h"
   8
   9static int max_depth = 10;
  10static unsigned long object_count;
  11static unsigned long duplicate_count;
  12static unsigned long packoff;
  13static unsigned long overflow_count;
  14static int packfd;
  15static int current_depth;
  16static void *lastdat;
  17static unsigned long lastdatlen;
  18static unsigned char lastsha1[20];
  19static unsigned char packsha1[20];
  20
  21struct object_entry
  22{
  23        struct object_entry *next;
  24        unsigned long offset;
  25        unsigned char sha1[20];
  26};
  27
  28struct overflow_object_entry
  29{
  30        struct overflow_object_entry *next;
  31        struct object_entry oe;
  32};
  33
  34struct object_entry *pool_start;
  35struct object_entry *pool_next;
  36struct object_entry *pool_end;
  37struct overflow_object_entry *overflow;
  38struct object_entry *table[1 << 16];
  39
  40static struct object_entry* new_object(unsigned char *sha1)
  41{
  42        if (pool_next != pool_end) {
  43                struct object_entry *e = pool_next++;
  44                memcpy(e->sha1, sha1, sizeof(e->sha1));
  45                return e;
  46        } else {
  47                struct overflow_object_entry *e;
  48
  49                e = xmalloc(sizeof(struct overflow_object_entry));
  50                e->next = overflow;
  51                memcpy(e->oe.sha1, sha1, sizeof(e->oe.sha1));
  52                overflow = e;
  53                overflow_count++;
  54                return &e->oe;
  55        }
  56}
  57
  58static struct object_entry* insert_object(unsigned char *sha1)
  59{
  60        unsigned int h = sha1[0] << 8 | sha1[1];
  61        struct object_entry *e = table[h];
  62        struct object_entry *p = 0;
  63
  64        while (e) {
  65                if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
  66                        return e;
  67                p = e;
  68                e = e->next;
  69        }
  70
  71        e = new_object(sha1);
  72        e->next = 0;
  73        e->offset = 0;
  74        if (p)
  75                p->next = e;
  76        else
  77                table[h] = e;
  78        return e;
  79}
  80
  81static ssize_t yread(int fd, void *buffer, size_t length)
  82{
  83        ssize_t ret = 0;
  84        while (ret < length) {
  85                ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
  86                if (size < 0) {
  87                        return size;
  88                }
  89                if (size == 0) {
  90                        return ret;
  91                }
  92                ret += size;
  93        }
  94        return ret;
  95}
  96
  97static ssize_t ywrite(int fd, void *buffer, size_t length)
  98{
  99        ssize_t ret = 0;
 100        while (ret < length) {
 101                ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
 102                if (size < 0) {
 103                        return size;
 104                }
 105                if (size == 0) {
 106                        return ret;
 107                }
 108                ret += size;
 109        }
 110        return ret;
 111}
 112
 113static unsigned long encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
 114{
 115        int n = 1;
 116        unsigned char c;
 117
 118        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 119                die("bad type %d", type);
 120
 121        c = (type << 4) | (size & 15);
 122        size >>= 4;
 123        while (size) {
 124                *hdr++ = c | 0x80;
 125                c = size & 0x7f;
 126                size >>= 7;
 127                n++;
 128        }
 129        *hdr = c;
 130        return n;
 131}
 132
 133static void write_blob(void *dat, unsigned long datlen)
 134{
 135        z_stream s;
 136        void *out, *delta;
 137        unsigned char hdr[64];
 138        unsigned long hdrlen, deltalen;
 139
 140        if (lastdat && current_depth < max_depth) {
 141                delta = diff_delta(lastdat, lastdatlen,
 142                        dat, datlen,
 143                        &deltalen, 0);
 144        } else
 145                delta = 0;
 146
 147        memset(&s, 0, sizeof(s));
 148        deflateInit(&s, zlib_compression_level);
 149
 150        if (delta) {
 151                current_depth++;
 152                s.next_in = delta;
 153                s.avail_in = deltalen;
 154                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
 155                if (ywrite(packfd, hdr, hdrlen) != hdrlen)
 156                        die("Can't write object header: %s", strerror(errno));
 157                if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1))
 158                        die("Can't write object base: %s", strerror(errno));
 159                packoff += hdrlen + sizeof(lastsha1);
 160        } else {
 161                current_depth = 0;
 162                s.next_in = dat;
 163                s.avail_in = datlen;
 164                hdrlen = encode_header(OBJ_BLOB, datlen, hdr);
 165                if (ywrite(packfd, hdr, hdrlen) != hdrlen)
 166                        die("Can't write object header: %s", strerror(errno));
 167                packoff += hdrlen;
 168        }
 169
 170        s.avail_out = deflateBound(&s, s.avail_in);
 171        s.next_out = out = xmalloc(s.avail_out);
 172        while (deflate(&s, Z_FINISH) == Z_OK)
 173                /* nothing */;
 174        deflateEnd(&s);
 175
 176        if (ywrite(packfd, out, s.total_out) != s.total_out)
 177                die("Failed writing compressed data %s", strerror(errno));
 178        packoff += s.total_out;
 179
 180        free(out);
 181        if (delta)
 182                free(delta);
 183}
 184
 185static void init_pack_header()
 186{
 187        const char* magic = "PACK";
 188        unsigned long version = 2;
 189        unsigned long zero = 0;
 190
 191        version = htonl(version);
 192
 193        if (ywrite(packfd, (char*)magic, 4) != 4)
 194                die("Can't write pack magic: %s", strerror(errno));
 195        if (ywrite(packfd, &version, 4) != 4)
 196                die("Can't write pack version: %s", strerror(errno));
 197        if (ywrite(packfd, &zero, 4) != 4)
 198                die("Can't write 0 object count: %s", strerror(errno));
 199        packoff = 4 * 3;
 200}
 201
 202static void fixup_header_footer()
 203{
 204        SHA_CTX c;
 205        char hdr[8];
 206        unsigned long cnt;
 207        char *buf;
 208        size_t n;
 209
 210        if (lseek(packfd, 0, SEEK_SET) != 0)
 211                die("Failed seeking to start: %s", strerror(errno));
 212
 213        SHA1_Init(&c);
 214        if (yread(packfd, hdr, 8) != 8)
 215                die("Failed reading header: %s", strerror(errno));
 216        SHA1_Update(&c, hdr, 8);
 217
 218        cnt = htonl(object_count);
 219        SHA1_Update(&c, &cnt, 4);
 220        if (ywrite(packfd, &cnt, 4) != 4)
 221                die("Failed writing object count: %s", strerror(errno));
 222
 223        buf = xmalloc(128 * 1024);
 224        for (;;) {
 225                n = xread(packfd, buf, 128 * 1024);
 226                if (n <= 0)
 227                        break;
 228                SHA1_Update(&c, buf, n);
 229        }
 230        free(buf);
 231
 232        SHA1_Final(packsha1, &c);
 233        if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1))
 234                die("Failed writing pack checksum: %s", strerror(errno));
 235}
 236
 237static int oecmp (const void *_a, const void *_b)
 238{
 239        struct object_entry *a = *((struct object_entry**)_a);
 240        struct object_entry *b = *((struct object_entry**)_b);
 241        return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
 242}
 243
 244static void write_index(const char *idx_name)
 245{
 246        struct sha1file *f;
 247        struct object_entry **idx, **c, **last;
 248        struct object_entry *e;
 249        struct overflow_object_entry *o;
 250        unsigned int array[256];
 251        int i;
 252
 253        /* Build the sorted table of object IDs. */
 254        idx = xmalloc(object_count * sizeof(struct object_entry*));
 255        c = idx;
 256        for (e = pool_start; e != pool_next; e++)
 257                *c++ = e;
 258        for (o = overflow; o; o = o->next)
 259                *c++ = &o->oe;
 260        last = idx + object_count;
 261        qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
 262
 263        /* Generate the fan-out array. */
 264        c = idx;
 265        for (i = 0; i < 256; i++) {
 266                struct object_entry **next = c;;
 267                while (next < last) {
 268                        if ((*next)->sha1[0] != i)
 269                                break;
 270                        next++;
 271                }
 272                array[i] = htonl(next - idx);
 273                c = next;
 274        }
 275
 276        f = sha1create("%s", idx_name);
 277        sha1write(f, array, 256 * sizeof(int));
 278        for (c = idx; c != last; c++) {
 279                unsigned int offset = htonl((*c)->offset);
 280                sha1write(f, &offset, 4);
 281                sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
 282        }
 283        sha1write(f, packsha1, sizeof(packsha1));
 284        sha1close(f, NULL, 1);
 285        free(idx);
 286}
 287
 288int main(int argc, const char **argv)
 289{
 290        const char *base_name = argv[1];
 291        int est_obj_cnt = atoi(argv[2]);
 292        char *pack_name;
 293        char *idx_name;
 294
 295        pack_name = xmalloc(strlen(base_name) + 6);
 296        sprintf(pack_name, "%s.pack", base_name);
 297        idx_name = xmalloc(strlen(base_name) + 5);
 298        sprintf(idx_name, "%s.idx", base_name);
 299
 300        packfd = open(pack_name, O_RDWR|O_CREAT|O_TRUNC, 0666);
 301        if (packfd < 0)
 302                die("Can't create pack file %s: %s", pack_name, strerror(errno));
 303
 304        pool_start = xmalloc(est_obj_cnt * sizeof(struct object_entry));
 305        pool_next = pool_start;
 306        pool_end = pool_start + est_obj_cnt;
 307
 308        init_pack_header();
 309        for (;;) {
 310                unsigned long datlen;
 311                int hdrlen;
 312                void *dat;
 313                char hdr[128];
 314                unsigned char sha1[20];
 315                SHA_CTX c;
 316                struct object_entry *e;
 317
 318                if (yread(0, &datlen, 4) != 4)
 319
 320                        break;
 321
 322                dat = xmalloc(datlen);
 323                if (yread(0, dat, datlen) != datlen)
 324                        break;
 325
 326                hdrlen = sprintf(hdr, "blob %lu", datlen) + 1;
 327                SHA1_Init(&c);
 328                SHA1_Update(&c, hdr, hdrlen);
 329                SHA1_Update(&c, dat, datlen);
 330                SHA1_Final(sha1, &c);
 331
 332                e = insert_object(sha1);
 333                if (!e->offset) {
 334                        e->offset = packoff;
 335                        write_blob(dat, datlen);
 336                        object_count++;
 337                        printf("%s\n", sha1_to_hex(sha1));
 338                        fflush(stdout);
 339
 340                        if (lastdat)
 341                                free(lastdat);
 342                        lastdat = dat;
 343                        lastdatlen = datlen;
 344                        memcpy(lastsha1, sha1, sizeof(sha1));
 345                } else {
 346                        duplicate_count++;
 347                        free(dat);
 348                }
 349        }
 350        fixup_header_footer();
 351        close(packfd);
 352        write_index(idx_name);
 353
 354        fprintf(stderr, "%lu objects, %lu duplicates, %lu pool overflow\n",
 355                object_count, duplicate_count, overflow_count);
 356
 357        return 0;
 358}