98c5d1cbdd1c8cfe73123f4b1df5749e1bdfa84a
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "delta.h"
   6#include "pack.h"
   7#include "csum-file.h"
   8
   9struct object_entry
  10{
  11        struct object_entry *next;
  12        unsigned long offset;
  13        unsigned char sha1[20];
  14};
  15
  16struct object_entry_block
  17{
  18        struct object_entry_block *next_block;
  19        struct object_entry *next_free;
  20        struct object_entry *end;
  21        struct object_entry entries[FLEX_ARRAY]; /* more */
  22};
  23
  24struct last_object
  25{
  26        void *data;
  27        unsigned int len;
  28        unsigned int depth;
  29        unsigned char sha1[20];
  30};
  31
  32struct tree;
  33struct tree_entry
  34{
  35        struct tree *tree;
  36        mode_t mode;
  37        unsigned char sha1[20];
  38        char name[FLEX_ARRAY]; /* more */
  39};
  40
  41struct tree
  42{
  43        struct last_object last_tree;
  44        unsigned long entry_count;
  45        struct tree_entry **entries;
  46};
  47
  48struct branch
  49{
  50        struct branch *next_branch;
  51        struct tree_entry tree;
  52        unsigned char sha1[20];
  53        const char *name;
  54};
  55
  56/* Stats and misc. counters. */
  57static int max_depth = 10;
  58static unsigned long alloc_count;
  59static unsigned long branch_count;
  60static unsigned long object_count;
  61static unsigned long duplicate_count;
  62static unsigned long object_count_by_type[9];
  63static unsigned long duplicate_count_by_type[9];
  64
  65/* The .pack file */
  66static int pack_fd;
  67static unsigned long pack_offset;
  68static unsigned char pack_sha1[20];
  69
  70/* Table of objects we've written. */
  71struct object_entry_block *blocks;
  72struct object_entry *object_table[1 << 16];
  73
  74/* Our last blob */
  75struct last_object last_blob;
  76
  77/* Branch data */
  78struct branch *branches;
  79struct branch *current_branch;
  80
  81static void alloc_objects(int cnt)
  82{
  83        struct object_entry_block *b;
  84
  85        b = xmalloc(sizeof(struct object_entry_block)
  86                + cnt * sizeof(struct object_entry));
  87        b->next_block = blocks;
  88        b->next_free = b->entries;
  89        b->end = b->entries + cnt;
  90        blocks = b;
  91        alloc_count += cnt;
  92}
  93
  94static struct object_entry* new_object(unsigned char *sha1)
  95{
  96        struct object_entry *e;
  97
  98        if (blocks->next_free == blocks->end)
  99                alloc_objects(1000);
 100
 101        e = blocks->next_free++;
 102        memcpy(e->sha1, sha1, sizeof(e->sha1));
 103        return e;
 104}
 105
 106static struct object_entry* insert_object(unsigned char *sha1)
 107{
 108        unsigned int h = sha1[0] << 8 | sha1[1];
 109        struct object_entry *e = object_table[h];
 110        struct object_entry *p = 0;
 111
 112        while (e) {
 113                if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
 114                        return e;
 115                p = e;
 116                e = e->next;
 117        }
 118
 119        e = new_object(sha1);
 120        e->next = 0;
 121        e->offset = 0;
 122        if (p)
 123                p->next = e;
 124        else
 125                object_table[h] = e;
 126        return e;
 127}
 128
 129static ssize_t yread(int fd, void *buffer, size_t length)
 130{
 131        ssize_t ret = 0;
 132        while (ret < length) {
 133                ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
 134                if (size < 0) {
 135                        return size;
 136                }
 137                if (size == 0) {
 138                        return ret;
 139                }
 140                ret += size;
 141        }
 142        return ret;
 143}
 144
 145static ssize_t ywrite(int fd, void *buffer, size_t length)
 146{
 147        ssize_t ret = 0;
 148        while (ret < length) {
 149                ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
 150                if (size < 0) {
 151                        return size;
 152                }
 153                if (size == 0) {
 154                        return ret;
 155                }
 156                ret += size;
 157        }
 158        return ret;
 159}
 160
 161static const char* read_string()
 162{
 163        static char sn[PATH_MAX];
 164        unsigned long slen;
 165
 166        if (yread(0, &slen, 4) != 4)
 167                die("Can't obtain string");
 168        if (!slen)
 169                return 0;
 170        if (slen > (PATH_MAX - 1))
 171                die("Can't handle excessive string length %lu", slen);
 172
 173        if (yread(0, sn, slen) != slen)
 174                die("Can't obtain string of length %lu", slen);
 175        sn[slen] = 0;
 176        return sn;
 177}
 178
 179static const char* read_required_string()
 180{
 181        const char *r = read_string();
 182        if (!r)
 183                die("Expected string command parameter, didn't find one");
 184        return r;
 185}
 186
 187static unsigned long encode_header(
 188        enum object_type type,
 189        unsigned long size,
 190        unsigned char *hdr)
 191{
 192        int n = 1;
 193        unsigned char c;
 194
 195        if (type < OBJ_COMMIT || type > OBJ_DELTA)
 196                die("bad type %d", type);
 197
 198        c = (type << 4) | (size & 15);
 199        size >>= 4;
 200        while (size) {
 201                *hdr++ = c | 0x80;
 202                c = size & 0x7f;
 203                size >>= 7;
 204                n++;
 205        }
 206        *hdr = c;
 207        return n;
 208}
 209
 210static int store_object(
 211        enum object_type type,
 212        void *dat,
 213        unsigned long datlen,
 214        struct last_object *last,
 215        unsigned char *sha1out)
 216{
 217        void *out, *delta;
 218        struct object_entry *e;
 219        unsigned char hdr[96];
 220        unsigned char sha1[20];
 221        unsigned long hdrlen, deltalen;
 222        SHA_CTX c;
 223        z_stream s;
 224
 225        hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
 226        SHA1_Init(&c);
 227        SHA1_Update(&c, hdr, hdrlen);
 228        SHA1_Update(&c, dat, datlen);
 229        SHA1_Final(sha1, &c);
 230        if (sha1out)
 231                memcpy(sha1out, sha1, sizeof(sha1));
 232
 233        e = insert_object(sha1);
 234        if (e->offset) {
 235                duplicate_count++;
 236                duplicate_count_by_type[type]++;
 237                return 0;
 238        }
 239        e->offset = pack_offset;
 240        object_count++;
 241        object_count_by_type[type]++;
 242
 243        if (last->data && last->depth < max_depth)
 244                delta = diff_delta(last->data, last->len,
 245                        dat, datlen,
 246                        &deltalen, 0);
 247        else
 248                delta = 0;
 249
 250        memset(&s, 0, sizeof(s));
 251        deflateInit(&s, zlib_compression_level);
 252
 253        if (delta) {
 254                last->depth++;
 255                s.next_in = delta;
 256                s.avail_in = deltalen;
 257                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
 258                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 259                        die("Can't write object header: %s", strerror(errno));
 260                if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
 261                        die("Can't write object base: %s", strerror(errno));
 262                pack_offset += hdrlen + sizeof(sha1);
 263        } else {
 264                last->depth = 0;
 265                s.next_in = dat;
 266                s.avail_in = datlen;
 267                hdrlen = encode_header(type, datlen, hdr);
 268                if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
 269                        die("Can't write object header: %s", strerror(errno));
 270                pack_offset += hdrlen;
 271        }
 272
 273        s.avail_out = deflateBound(&s, s.avail_in);
 274        s.next_out = out = xmalloc(s.avail_out);
 275        while (deflate(&s, Z_FINISH) == Z_OK)
 276                /* nothing */;
 277        deflateEnd(&s);
 278
 279        if (ywrite(pack_fd, out, s.total_out) != s.total_out)
 280                die("Failed writing compressed data %s", strerror(errno));
 281        pack_offset += s.total_out;
 282
 283        free(out);
 284        if (delta)
 285                free(delta);
 286        if (last->data)
 287                free(last->data);
 288        last->data = dat;
 289        last->len = datlen;
 290        memcpy(last->sha1, sha1, sizeof(sha1));
 291        return 1;
 292}
 293
 294static void init_pack_header()
 295{
 296        const char* magic = "PACK";
 297        unsigned long version = 3;
 298        unsigned long zero = 0;
 299
 300        version = htonl(version);
 301
 302        if (ywrite(pack_fd, (char*)magic, 4) != 4)
 303                die("Can't write pack magic: %s", strerror(errno));
 304        if (ywrite(pack_fd, &version, 4) != 4)
 305                die("Can't write pack version: %s", strerror(errno));
 306        if (ywrite(pack_fd, &zero, 4) != 4)
 307                die("Can't write 0 object count: %s", strerror(errno));
 308        pack_offset = 4 * 3;
 309}
 310
 311static void fixup_header_footer()
 312{
 313        SHA_CTX c;
 314        char hdr[8];
 315        unsigned long cnt;
 316        char *buf;
 317        size_t n;
 318
 319        if (lseek(pack_fd, 0, SEEK_SET) != 0)
 320                die("Failed seeking to start: %s", strerror(errno));
 321
 322        SHA1_Init(&c);
 323        if (yread(pack_fd, hdr, 8) != 8)
 324                die("Failed reading header: %s", strerror(errno));
 325        SHA1_Update(&c, hdr, 8);
 326
 327        cnt = htonl(object_count);
 328        SHA1_Update(&c, &cnt, 4);
 329        if (ywrite(pack_fd, &cnt, 4) != 4)
 330                die("Failed writing object count: %s", strerror(errno));
 331
 332        buf = xmalloc(128 * 1024);
 333        for (;;) {
 334                n = xread(pack_fd, buf, 128 * 1024);
 335                if (n <= 0)
 336                        break;
 337                SHA1_Update(&c, buf, n);
 338        }
 339        free(buf);
 340
 341        SHA1_Final(pack_sha1, &c);
 342        if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
 343                die("Failed writing pack checksum: %s", strerror(errno));
 344}
 345
 346static int oecmp (const void *_a, const void *_b)
 347{
 348        struct object_entry *a = *((struct object_entry**)_a);
 349        struct object_entry *b = *((struct object_entry**)_b);
 350        return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
 351}
 352
 353static void write_index(const char *idx_name)
 354{
 355        struct sha1file *f;
 356        struct object_entry **idx, **c, **last;
 357        struct object_entry *e;
 358        struct object_entry_block *o;
 359        unsigned int array[256];
 360        int i;
 361
 362        /* Build the sorted table of object IDs. */
 363        idx = xmalloc(object_count * sizeof(struct object_entry*));
 364        c = idx;
 365        for (o = blocks; o; o = o->next_block)
 366                for (e = o->entries; e != o->next_free; e++)
 367                        *c++ = e;
 368        last = idx + object_count;
 369        qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
 370
 371        /* Generate the fan-out array. */
 372        c = idx;
 373        for (i = 0; i < 256; i++) {
 374                struct object_entry **next = c;;
 375                while (next < last) {
 376                        if ((*next)->sha1[0] != i)
 377                                break;
 378                        next++;
 379                }
 380                array[i] = htonl(next - idx);
 381                c = next;
 382        }
 383
 384        f = sha1create("%s", idx_name);
 385        sha1write(f, array, 256 * sizeof(int));
 386        for (c = idx; c != last; c++) {
 387                unsigned int offset = htonl((*c)->offset);
 388                sha1write(f, &offset, 4);
 389                sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
 390        }
 391        sha1write(f, pack_sha1, sizeof(pack_sha1));
 392        sha1close(f, NULL, 1);
 393        free(idx);
 394}
 395
 396static void new_blob()
 397{
 398        unsigned long datlen;
 399        void *dat;
 400
 401        if (yread(0, &datlen, 4) != 4)
 402                die("Can't obtain blob length");
 403
 404        dat = xmalloc(datlen);
 405        if (yread(0, dat, datlen) != datlen)
 406                die("Con't obtain %lu bytes of blob data", datlen);
 407
 408        if (!store_object(OBJ_BLOB, dat, datlen, &last_blob, 0))
 409                free(dat);
 410}
 411
 412static struct branch* lookup_branch(const char *name)
 413{
 414        struct branch *b;
 415        for (b = branches; b; b = b->next_branch) {
 416                if (!strcmp(name, b->name))
 417                        return b;
 418        }
 419        die("No branch named '%s' has been declared", name);
 420}
 421
 422static struct tree* deep_copy_tree (struct tree *t)
 423{
 424        struct tree *r = xmalloc(sizeof(struct tree));
 425        unsigned long i;
 426
 427        if (t->last_tree.data) {
 428                r->last_tree.data = xmalloc(t->last_tree.len);
 429                r->last_tree.len = t->last_tree.len;
 430                r->last_tree.depth = t->last_tree.depth;
 431                memcpy(r->last_tree.data, t->last_tree.data, t->last_tree.len);
 432                memcpy(r->last_tree.sha1, t->last_tree.sha1, sizeof(t->last_tree.sha1));
 433        }
 434
 435        r->entry_count = t->entry_count;
 436        r->entries = xmalloc(t->entry_count * sizeof(struct tree_entry*));
 437        for (i = 0; i < t->entry_count; i++) {
 438                struct tree_entry *a = t->entries[i];
 439                struct tree_entry *b;
 440
 441                b = xmalloc(sizeof(struct tree_entry) + strlen(a->name) + 1);
 442                b->tree = a->tree ? deep_copy_tree(a->tree) : 0;
 443                b->mode = a->mode;
 444                memcpy(b->sha1, a->sha1, sizeof(a->sha1));
 445                strcpy(b->name, a->name);
 446                r->entries[i] = b;
 447        }
 448
 449        return r;
 450}
 451
 452static void store_tree (struct tree_entry *e)
 453{
 454        struct tree *t = e->tree;
 455        unsigned long maxlen, i;
 456        char *buf, *c;
 457
 458        if (memcmp(null_sha1, e->sha1, sizeof(e->sha1)))
 459                return;
 460
 461        maxlen = t->entry_count * 32;
 462        for (i = 0; i < t->entry_count; i++)
 463                maxlen += strlen(t->entries[i]->name);
 464
 465        buf = c = xmalloc(maxlen);
 466        for (i = 0; i < t->entry_count; i++) {
 467                struct tree_entry *e = t->entries[i];
 468                c += sprintf(c, "%o %s", e->mode, e->name) + 1;
 469                if (e->tree)
 470                        store_tree(e);
 471                memcpy(c, e->sha1, sizeof(e->sha1));
 472                c += sizeof(e->sha1);
 473        }
 474
 475        if (!store_object(OBJ_TREE, buf, c - buf, &t->last_tree, e->sha1))
 476                free(buf);
 477}
 478
 479static void new_branch()
 480{
 481        struct branch *nb = xcalloc(1, sizeof(struct branch));
 482        const char *source_name;
 483
 484        nb->name = strdup(read_required_string());
 485        source_name = read_string();
 486        if (source_name) {
 487                struct branch *sb = lookup_branch(source_name);
 488                nb->tree.tree = deep_copy_tree(sb->tree.tree);
 489                memcpy(nb->tree.sha1, sb->tree.sha1, sizeof(sb->tree.sha1));
 490                memcpy(nb->sha1, sb->sha1, sizeof(sb->sha1));
 491        } else {
 492                nb->tree.tree = xcalloc(1, sizeof(struct tree));
 493                nb->tree.tree->entries = xmalloc(8*sizeof(struct tree_entry*));
 494        }
 495        nb->next_branch = branches;
 496        branches = nb;
 497        branch_count++;
 498}
 499
 500static void set_branch()
 501{
 502        current_branch = lookup_branch(read_required_string());
 503}
 504
 505static void commit()
 506{
 507        store_tree(&current_branch->tree);
 508}
 509
 510int main(int argc, const char **argv)
 511{
 512        const char *base_name = argv[1];
 513        int est_obj_cnt = atoi(argv[2]);
 514        char *pack_name;
 515        char *idx_name;
 516        struct stat sb;
 517
 518        pack_name = xmalloc(strlen(base_name) + 6);
 519        sprintf(pack_name, "%s.pack", base_name);
 520        idx_name = xmalloc(strlen(base_name) + 5);
 521        sprintf(idx_name, "%s.idx", base_name);
 522
 523        pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
 524        if (pack_fd < 0)
 525                die("Can't create %s: %s", pack_name, strerror(errno));
 526
 527        alloc_objects(est_obj_cnt);
 528        init_pack_header();
 529        for (;;) {
 530                unsigned long cmd;
 531                if (yread(0, &cmd, 4) != 4)
 532                        break;
 533
 534                switch (cmd) {
 535                case 'blob': new_blob();   break;
 536                case 'newb': new_branch(); break;
 537                case 'setb': set_branch(); break;
 538                case 'comt': commit();     break;
 539                default:
 540                        die("Invalid command %lu", cmd);
 541                }
 542        }
 543        fixup_header_footer();
 544        close(pack_fd);
 545        write_index(idx_name);
 546
 547        fprintf(stderr, "%s statistics:\n", argv[0]);
 548        fprintf(stderr, "---------------------------------------------------\n");
 549        fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
 550        fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
 551        fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
 552        fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
 553        fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
 554        fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
 555        fprintf(stderr, "Total branches:  %10lu\n", branch_count);
 556        fprintf(stderr, "---------------------------------------------------\n");
 557
 558        stat(pack_name, &sb);
 559        fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
 560        stat(idx_name, &sb);
 561        fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
 562
 563        fprintf(stderr, "\n");
 564
 565        return 0;
 566}