c9c48c5869a7726b0ceb4954385a330fd7653ebd
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "delta.h"
   6#include "pack.h"
   7#include "csum-file.h"
   8
   9struct object_entry
  10{
  11        struct object_entry *next;
  12        unsigned long offset;
  13        unsigned char sha1[20];
  14};
  15
  16struct object_entry_block
  17{
  18        struct object_entry_block *next_block;
  19        struct object_entry *next_free;
  20        struct object_entry *end;
  21        struct object_entry entries[FLEX_ARRAY]; /* more */
  22};
  23
  24struct last_object
  25{
  26        void *data;
  27        unsigned long len;
  28        int depth;
  29        unsigned char sha1[20];
  30};
  31
  32/* Stats and misc. counters. */
  33static int max_depth = 10;
  34static unsigned long alloc_count;
  35static unsigned long object_count;
  36static unsigned long duplicate_count;
  37static unsigned long object_count_by_type[9];
  38static unsigned long duplicate_count_by_type[9];
  39
  40/* The .pack file */
  41static int pack_fd;
  42static unsigned long pack_offset;
  43static unsigned char pack_sha1[20];
  44
  45/* Table of objects we've written. */
  46struct object_entry_block *blocks;
  47struct object_entry *object_table[1 << 16];
  48
  49/* Our last blob */
  50struct last_object last_blob;
  51
  52static void alloc_objects(int cnt)
  53{
  54        struct object_entry_block *b;
  55
  56        b = xmalloc(sizeof(struct object_entry_block)
  57                + cnt * sizeof(struct object_entry));
  58        b->next_block = blocks;
  59        b->next_free = b->entries;
  60        b->end = b->entries + cnt;
  61        blocks = b;
  62        alloc_count += cnt;
  63}
  64
  65static struct object_entry* new_object(unsigned char *sha1)
  66{
  67        struct object_entry *e;
  68
  69        if (blocks->next_free == blocks->end)
  70                alloc_objects(1000);
  71
  72        e = blocks->next_free++;
  73        memcpy(e->sha1, sha1, sizeof(e->sha1));
  74        return e;
  75}
  76
  77static struct object_entry* insert_object(unsigned char *sha1)
  78{
  79        unsigned int h = sha1[0] << 8 | sha1[1];
  80        struct object_entry *e = object_table[h];
  81        struct object_entry *p = 0;
  82
  83        while (e) {
  84                if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
  85                        return e;
  86                p = e;
  87                e = e->next;
  88        }
  89
  90        e = new_object(sha1);
  91        e->next = 0;
  92        e->offset = 0;
  93        if (p)
  94                p->next = e;
  95        else
  96                object_table[h] = e;
  97        return e;
  98}
  99
 100static ssize_t yread(int fd, void *buffer, size_t length)
 101{
 102        ssize_t ret = 0;
 103        while (ret < length) {
 104                ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
 105                if (size < 0) {
 106                        return size;
 107                }
 108                if (size == 0) {
 109                        return ret;
 110                }
 111                ret += size;
 112        }
 113        return ret;
 114}
 115
 116static ssize_t ywrite(int fd, void *buffer, size_t length)
 117{
 118        ssize_t ret = 0;
 119        while (ret < length) {
 120                ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
 121                if (size < 0) {
 122                        return size;
 123                }
 124                if (size == 0) {
 125                        return ret;
 126                }
 127                ret += size;
 128        }
 129        return ret;
 130}
 131
 132static unsigned long encode_header(
 133        enum object_type type,
 134        unsigned long size,
 135        unsigned char *hdr)
 136{
 137        int n = 1;
 138        unsigned char c;
 139
 140        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 141                die("bad type %d", type);
 142
 143        c = (type << 4) | (size & 15);
 144        size >>= 4;
 145        while (size) {
 146                *hdr++ = c | 0x80;
 147                c = size & 0x7f;
 148                size >>= 7;
 149                n++;
 150        }
 151        *hdr = c;
 152        return n;
 153}
 154
 155static int store_object(
 156        enum object_type type,
 157        void *dat,
 158        unsigned long datlen,
 159        struct last_object *last)
 160{
 161        void *out, *delta;
 162        struct object_entry *e;
 163        unsigned char hdr[96];
 164        unsigned char sha1[20];
 165        unsigned long hdrlen, deltalen;
 166        SHA_CTX c;
 167        z_stream s;
 168
 169        hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
 170        SHA1_Init(&c);
 171        SHA1_Update(&c, hdr, hdrlen);
 172        SHA1_Update(&c, dat, datlen);
 173        SHA1_Final(sha1, &c);
 174
 175        e = insert_object(sha1);
 176        if (e->offset) {
 177                duplicate_count++;
 178                duplicate_count_by_type[type]++;
 179                return 0;
 180        }
 181        e->offset = pack_offset;
 182        object_count++;
 183        object_count_by_type[type]++;
 184
 185        if (last->data && last->depth < max_depth)
 186                delta = diff_delta(last->data, last->len,
 187                        dat, datlen,
 188                        &deltalen, 0);
 189        else
 190                delta = 0;
 191
 192        memset(&s, 0, sizeof(s));
 193        deflateInit(&s, zlib_compression_level);
 194
 195        if (delta) {
 196                last->depth++;
 197                s.next_in = delta;
 198                s.avail_in = deltalen;
 199                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
 200                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 201                        die("Can't write object header: %s", strerror(errno));
 202                if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
 203                        die("Can't write object base: %s", strerror(errno));
 204                pack_offset += hdrlen + sizeof(sha1);
 205        } else {
 206                last->depth = 0;
 207                s.next_in = dat;
 208                s.avail_in = datlen;
 209                hdrlen = encode_header(type, datlen, hdr);
 210                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 211                        die("Can't write object header: %s", strerror(errno));
 212                pack_offset += hdrlen;
 213        }
 214
 215        s.avail_out = deflateBound(&s, s.avail_in);
 216        s.next_out = out = xmalloc(s.avail_out);
 217        while (deflate(&s, Z_FINISH) == Z_OK)
 218                /* nothing */;
 219        deflateEnd(&s);
 220
 221        if (ywrite(pack_fd, out, s.total_out) != s.total_out)
 222                die("Failed writing compressed data %s", strerror(errno));
 223        pack_offset += s.total_out;
 224
 225        free(out);
 226        if (delta)
 227                free(delta);
 228        if (last->data)
 229                free(last->data);
 230        last->data = dat;
 231        last->len = datlen;
 232        memcpy(last->sha1, sha1, sizeof(sha1));
 233        return 1;
 234}
 235
 236static void init_pack_header()
 237{
 238        const char* magic = "PACK";
 239        unsigned long version = 3;
 240        unsigned long zero = 0;
 241
 242        version = htonl(version);
 243
 244        if (ywrite(pack_fd, (char*)magic, 4) != 4)
 245                die("Can't write pack magic: %s", strerror(errno));
 246        if (ywrite(pack_fd, &version, 4) != 4)
 247                die("Can't write pack version: %s", strerror(errno));
 248        if (ywrite(pack_fd, &zero, 4) != 4)
 249                die("Can't write 0 object count: %s", strerror(errno));
 250        pack_offset = 4 * 3;
 251}
 252
 253static void fixup_header_footer()
 254{
 255        SHA_CTX c;
 256        char hdr[8];
 257        unsigned long cnt;
 258        char *buf;
 259        size_t n;
 260
 261        if (lseek(pack_fd, 0, SEEK_SET) != 0)
 262                die("Failed seeking to start: %s", strerror(errno));
 263
 264        SHA1_Init(&c);
 265        if (yread(pack_fd, hdr, 8) != 8)
 266                die("Failed reading header: %s", strerror(errno));
 267        SHA1_Update(&c, hdr, 8);
 268
 269        cnt = htonl(object_count);
 270        SHA1_Update(&c, &cnt, 4);
 271        if (ywrite(pack_fd, &cnt, 4) != 4)
 272                die("Failed writing object count: %s", strerror(errno));
 273
 274        buf = xmalloc(128 * 1024);
 275        for (;;) {
 276                n = xread(pack_fd, buf, 128 * 1024);
 277                if (n <= 0)
 278                        break;
 279                SHA1_Update(&c, buf, n);
 280        }
 281        free(buf);
 282
 283        SHA1_Final(pack_sha1, &c);
 284        if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
 285                die("Failed writing pack checksum: %s", strerror(errno));
 286}
 287
 288static int oecmp (const void *_a, const void *_b)
 289{
 290        struct object_entry *a = *((struct object_entry**)_a);
 291        struct object_entry *b = *((struct object_entry**)_b);
 292        return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
 293}
 294
 295static void write_index(const char *idx_name)
 296{
 297        struct sha1file *f;
 298        struct object_entry **idx, **c, **last;
 299        struct object_entry *e;
 300        struct object_entry_block *o;
 301        unsigned int array[256];
 302        int i;
 303
 304        /* Build the sorted table of object IDs. */
 305        idx = xmalloc(object_count * sizeof(struct object_entry*));
 306        c = idx;
 307        for (o = blocks; o; o = o->next_block)
 308                for (e = o->entries; e != o->next_free; e++)
 309                        *c++ = e;
 310        last = idx + object_count;
 311        qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
 312
 313        /* Generate the fan-out array. */
 314        c = idx;
 315        for (i = 0; i < 256; i++) {
 316                struct object_entry **next = c;;
 317                while (next < last) {
 318                        if ((*next)->sha1[0] != i)
 319                                break;
 320                        next++;
 321                }
 322                array[i] = htonl(next - idx);
 323                c = next;
 324        }
 325
 326        f = sha1create("%s", idx_name);
 327        sha1write(f, array, 256 * sizeof(int));
 328        for (c = idx; c != last; c++) {
 329                unsigned int offset = htonl((*c)->offset);
 330                sha1write(f, &offset, 4);
 331                sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
 332        }
 333        sha1write(f, pack_sha1, sizeof(pack_sha1));
 334        sha1close(f, NULL, 1);
 335        free(idx);
 336}
 337
 338static void new_blob()
 339{
 340        unsigned long datlen;
 341        void *dat;
 342
 343        if (yread(0, &datlen, 4) != 4)
 344                die("Can't obtain blob length");
 345
 346        dat = xmalloc(datlen);
 347        if (yread(0, dat, datlen) != datlen)
 348                die("Con't obtain %lu bytes of blob data", datlen);
 349
 350        if (!store_object(OBJ_BLOB, dat, datlen, &last_blob))
 351                free(dat);
 352}
 353
 354int main(int argc, const char **argv)
 355{
 356        const char *base_name = argv[1];
 357        int est_obj_cnt = atoi(argv[2]);
 358        char *pack_name;
 359        char *idx_name;
 360        struct stat sb;
 361
 362        pack_name = xmalloc(strlen(base_name) + 6);
 363        sprintf(pack_name, "%s.pack", base_name);
 364        idx_name = xmalloc(strlen(base_name) + 5);
 365        sprintf(idx_name, "%s.idx", base_name);
 366
 367        pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
 368        if (pack_fd < 0)
 369                die("Can't create %s: %s", pack_name, strerror(errno));
 370
 371        alloc_objects(est_obj_cnt);
 372        init_pack_header();
 373        for (;;) {
 374                unsigned long cmd;
 375                if (yread(0, &cmd, 4) != 4)
 376                        break;
 377
 378                switch (cmd) {
 379                case 'blob': new_blob(); break;
 380                default:
 381                        die("Invalid command %lu", cmd);
 382                }
 383        }
 384        fixup_header_footer();
 385        close(pack_fd);
 386        write_index(idx_name);
 387
 388        fprintf(stderr, "%s statistics:\n", argv[0]);
 389        fprintf(stderr, "---------------------------------------------------\n");
 390        fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
 391        fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
 392        fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
 393        fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
 394        fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
 395        fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
 396        fprintf(stderr, "---------------------------------------------------\n");
 397
 398        stat(pack_name, &sb);
 399        fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
 400        stat(idx_name, &sb);
 401        fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
 402
 403        fprintf(stderr, "\n");
 404
 405        return 0;
 406}