8b4be28f606b812e27d513e7907f3e8032a96056
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "delta.h"
   6#include "pack.h"
   7#include "csum-file.h"
   8
   9struct object_entry
  10{
  11        struct object_entry *next;
  12        unsigned long offset;
  13        unsigned char sha1[20];
  14};
  15
  16struct object_entry_block
  17{
  18        struct object_entry_block *next_block;
  19        struct object_entry *next_free;
  20        struct object_entry *end;
  21        struct object_entry entries[FLEX_ARRAY]; /* more */
  22};
  23
  24struct last_object
  25{
  26        void *data;
  27        unsigned long len;
  28        int depth;
  29        unsigned char sha1[20];
  30};
  31
  32/* Stats and misc. counters. */
  33static int max_depth = 10;
  34static unsigned long alloc_count;
  35static unsigned long object_count;
  36static unsigned long duplicate_count;
  37
  38/* The .pack file */
  39static int pack_fd;
  40static unsigned long pack_offset;
  41static unsigned char pack_sha1[20];
  42
  43/* Table of objects we've written. */
  44struct object_entry_block *blocks;
  45struct object_entry *object_table[1 << 16];
  46
  47/* Our last blob */
  48struct last_object last_blob;
  49
  50static void alloc_objects(int cnt)
  51{
  52        struct object_entry_block *b;
  53
  54        b = xmalloc(sizeof(struct object_entry_block)
  55                + cnt * sizeof(struct object_entry));
  56        b->next_block = blocks;
  57        b->next_free = b->entries;
  58        b->end = b->entries + cnt;
  59        blocks = b;
  60        alloc_count += cnt;
  61}
  62
  63static struct object_entry* new_object(unsigned char *sha1)
  64{
  65        struct object_entry *e;
  66
  67        if (blocks->next_free == blocks->end)
  68                alloc_objects(1000);
  69
  70        e = blocks->next_free++;
  71        memcpy(e->sha1, sha1, sizeof(e->sha1));
  72        return e;
  73}
  74
  75static struct object_entry* insert_object(unsigned char *sha1)
  76{
  77        unsigned int h = sha1[0] << 8 | sha1[1];
  78        struct object_entry *e = object_table[h];
  79        struct object_entry *p = 0;
  80
  81        while (e) {
  82                if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
  83                        return e;
  84                p = e;
  85                e = e->next;
  86        }
  87
  88        e = new_object(sha1);
  89        e->next = 0;
  90        e->offset = 0;
  91        if (p)
  92                p->next = e;
  93        else
  94                object_table[h] = e;
  95        return e;
  96}
  97
  98static ssize_t yread(int fd, void *buffer, size_t length)
  99{
 100        ssize_t ret = 0;
 101        while (ret < length) {
 102                ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
 103                if (size < 0) {
 104                        return size;
 105                }
 106                if (size == 0) {
 107                        return ret;
 108                }
 109                ret += size;
 110        }
 111        return ret;
 112}
 113
 114static ssize_t ywrite(int fd, void *buffer, size_t length)
 115{
 116        ssize_t ret = 0;
 117        while (ret < length) {
 118                ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
 119                if (size < 0) {
 120                        return size;
 121                }
 122                if (size == 0) {
 123                        return ret;
 124                }
 125                ret += size;
 126        }
 127        return ret;
 128}
 129
 130static unsigned long encode_header(
 131        enum object_type type,
 132        unsigned long size,
 133        unsigned char *hdr)
 134{
 135        int n = 1;
 136        unsigned char c;
 137
 138        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 139                die("bad type %d", type);
 140
 141        c = (type << 4) | (size & 15);
 142        size >>= 4;
 143        while (size) {
 144                *hdr++ = c | 0x80;
 145                c = size & 0x7f;
 146                size >>= 7;
 147                n++;
 148        }
 149        *hdr = c;
 150        return n;
 151}
 152
 153static int store_object(
 154        enum object_type type,
 155        void *dat,
 156        unsigned long datlen,
 157        struct last_object *last)
 158{
 159        void *out, *delta;
 160        struct object_entry *e;
 161        unsigned char hdr[96];
 162        unsigned char sha1[20];
 163        unsigned long hdrlen, deltalen;
 164        SHA_CTX c;
 165        z_stream s;
 166
 167        hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
 168        SHA1_Init(&c);
 169        SHA1_Update(&c, hdr, hdrlen);
 170        SHA1_Update(&c, dat, datlen);
 171        SHA1_Final(sha1, &c);
 172
 173        e = insert_object(sha1);
 174        if (e->offset) {
 175                duplicate_count++;
 176                return 0;
 177        }
 178        e->offset = pack_offset;
 179        object_count++;
 180
 181        if (last->data && last->depth < max_depth)
 182                delta = diff_delta(last->data, last->len,
 183                        dat, datlen,
 184                        &deltalen, 0);
 185        else
 186                delta = 0;
 187
 188        memset(&s, 0, sizeof(s));
 189        deflateInit(&s, zlib_compression_level);
 190
 191        if (delta) {
 192                last->depth++;
 193                s.next_in = delta;
 194                s.avail_in = deltalen;
 195                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
 196                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 197                        die("Can't write object header: %s", strerror(errno));
 198                if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
 199                        die("Can't write object base: %s", strerror(errno));
 200                pack_offset += hdrlen + sizeof(sha1);
 201        } else {
 202                last->depth = 0;
 203                s.next_in = dat;
 204                s.avail_in = datlen;
 205                hdrlen = encode_header(type, datlen, hdr);
 206                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 207                        die("Can't write object header: %s", strerror(errno));
 208                pack_offset += hdrlen;
 209        }
 210
 211        s.avail_out = deflateBound(&s, s.avail_in);
 212        s.next_out = out = xmalloc(s.avail_out);
 213        while (deflate(&s, Z_FINISH) == Z_OK)
 214                /* nothing */;
 215        deflateEnd(&s);
 216
 217        if (ywrite(pack_fd, out, s.total_out) != s.total_out)
 218                die("Failed writing compressed data %s", strerror(errno));
 219        pack_offset += s.total_out;
 220
 221        free(out);
 222        if (delta)
 223                free(delta);
 224        if (last->data)
 225                free(last->data);
 226        last->data = dat;
 227        last->len = datlen;
 228        memcpy(last->sha1, sha1, sizeof(sha1));
 229        return 1;
 230}
 231
 232static void init_pack_header()
 233{
 234        const char* magic = "PACK";
 235        unsigned long version = 2;
 236        unsigned long zero = 0;
 237
 238        version = htonl(version);
 239
 240        if (ywrite(pack_fd, (char*)magic, 4) != 4)
 241                die("Can't write pack magic: %s", strerror(errno));
 242        if (ywrite(pack_fd, &version, 4) != 4)
 243                die("Can't write pack version: %s", strerror(errno));
 244        if (ywrite(pack_fd, &zero, 4) != 4)
 245                die("Can't write 0 object count: %s", strerror(errno));
 246        pack_offset = 4 * 3;
 247}
 248
 249static void fixup_header_footer()
 250{
 251        SHA_CTX c;
 252        char hdr[8];
 253        unsigned long cnt;
 254        char *buf;
 255        size_t n;
 256
 257        if (lseek(pack_fd, 0, SEEK_SET) != 0)
 258                die("Failed seeking to start: %s", strerror(errno));
 259
 260        SHA1_Init(&c);
 261        if (yread(pack_fd, hdr, 8) != 8)
 262                die("Failed reading header: %s", strerror(errno));
 263        SHA1_Update(&c, hdr, 8);
 264
 265        cnt = htonl(object_count);
 266        SHA1_Update(&c, &cnt, 4);
 267        if (ywrite(pack_fd, &cnt, 4) != 4)
 268                die("Failed writing object count: %s", strerror(errno));
 269
 270        buf = xmalloc(128 * 1024);
 271        for (;;) {
 272                n = xread(pack_fd, buf, 128 * 1024);
 273                if (n <= 0)
 274                        break;
 275                SHA1_Update(&c, buf, n);
 276        }
 277        free(buf);
 278
 279        SHA1_Final(pack_sha1, &c);
 280        if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
 281                die("Failed writing pack checksum: %s", strerror(errno));
 282}
 283
 284static int oecmp (const void *_a, const void *_b)
 285{
 286        struct object_entry *a = *((struct object_entry**)_a);
 287        struct object_entry *b = *((struct object_entry**)_b);
 288        return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
 289}
 290
 291static void write_index(const char *idx_name)
 292{
 293        struct sha1file *f;
 294        struct object_entry **idx, **c, **last;
 295        struct object_entry *e;
 296        struct object_entry_block *o;
 297        unsigned int array[256];
 298        int i;
 299
 300        /* Build the sorted table of object IDs. */
 301        idx = xmalloc(object_count * sizeof(struct object_entry*));
 302        c = idx;
 303        for (o = blocks; o; o = o->next_block)
 304                for (e = o->entries; e != o->next_free; e++)
 305                        *c++ = e;
 306        last = idx + object_count;
 307        qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
 308
 309        /* Generate the fan-out array. */
 310        c = idx;
 311        for (i = 0; i < 256; i++) {
 312                struct object_entry **next = c;;
 313                while (next < last) {
 314                        if ((*next)->sha1[0] != i)
 315                                break;
 316                        next++;
 317                }
 318                array[i] = htonl(next - idx);
 319                c = next;
 320        }
 321
 322        f = sha1create("%s", idx_name);
 323        sha1write(f, array, 256 * sizeof(int));
 324        for (c = idx; c != last; c++) {
 325                unsigned int offset = htonl((*c)->offset);
 326                sha1write(f, &offset, 4);
 327                sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
 328        }
 329        sha1write(f, pack_sha1, sizeof(pack_sha1));
 330        sha1close(f, NULL, 1);
 331        free(idx);
 332}
 333
 334int main(int argc, const char **argv)
 335{
 336        const char *base_name = argv[1];
 337        int est_obj_cnt = atoi(argv[2]);
 338        char *pack_name;
 339        char *idx_name;
 340
 341        pack_name = xmalloc(strlen(base_name) + 6);
 342        sprintf(pack_name, "%s.pack", base_name);
 343        idx_name = xmalloc(strlen(base_name) + 5);
 344        sprintf(idx_name, "%s.idx", base_name);
 345
 346        pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
 347        if (pack_fd < 0)
 348                die("Can't create pack file %s: %s", pack_name, strerror(errno));
 349
 350        alloc_objects(est_obj_cnt);
 351        init_pack_header();
 352        for (;;) {
 353                unsigned long datlen;
 354                void *dat;
 355
 356                if (yread(0, &datlen, 4) != 4)
 357                        break;
 358
 359                dat = xmalloc(datlen);
 360                if (yread(0, dat, datlen) != datlen)
 361                        break;
 362
 363                if (!store_object(OBJ_BLOB, dat, datlen, &last_blob))
 364                        free(dat);
 365        }
 366        fixup_header_footer();
 367        close(pack_fd);
 368        write_index(idx_name);
 369
 370        fprintf(stderr, "%lu objects, %lu duplicates, %lu allocated (%lu overflow)\n",
 371                object_count, duplicate_count, alloc_count, alloc_count - est_obj_cnt);
 372
 373        return 0;
 374}