6c24f641dd596cbe486b5fb43ee454957e2cdd30
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4
   5static const char pack_usage[] = "git-pack-objects [--window=N] base-name < object-list";
   6
   7enum object_type {
   8        OBJ_NONE,
   9        OBJ_COMMIT,
  10        OBJ_TREE,
  11        OBJ_BLOB,
  12        OBJ_DELTA       // NOTE! This is _not_ the same as a "delta" object in the filesystem
  13};
  14
  15struct object_entry {
  16        unsigned char sha1[20];
  17        unsigned long size;
  18        unsigned long offset;
  19        unsigned int depth;
  20        unsigned int flags;
  21        enum object_type type;
  22        unsigned long delta_size;
  23        struct object_entry *delta;
  24};
  25
  26static struct object_entry **sorted_by_sha, **sorted_by_type;
  27static struct object_entry *objects = NULL;
  28static int nr_objects = 0, nr_alloc = 0;
  29static const char *base_name;
  30
  31struct myfile {
  32        int fd;
  33        unsigned long chars;
  34        unsigned char buffer[8192];
  35};
  36
  37static FILE *create_file(const char *suffix)
  38{
  39        static char filename[PATH_MAX];
  40        unsigned len;
  41
  42        len = snprintf(filename, PATH_MAX, "%s.%s", base_name, suffix);
  43        if (len >= PATH_MAX)
  44                die("you wascally wabbit, you");
  45        return fopen(filename, "w");
  46}
  47
  48static unsigned long fwrite_compressed(void *in, unsigned long size, FILE *f)
  49{
  50        z_stream stream;
  51        unsigned long maxsize;
  52        void *out;
  53
  54        memset(&stream, 0, sizeof(stream));
  55        deflateInit(&stream, Z_DEFAULT_COMPRESSION);
  56        maxsize = deflateBound(&stream, size);
  57        out = xmalloc(maxsize);
  58
  59        /* Compress it */
  60        stream.next_in = in;
  61        stream.avail_in = size;
  62
  63        stream.next_out = out;
  64        stream.avail_out = maxsize;
  65
  66        while (deflate(&stream, Z_FINISH) == Z_OK)
  67                /* nothing */;
  68        deflateEnd(&stream);
  69
  70        size = stream.total_out;
  71        fwrite(out, size, 1, f);
  72        free(out);
  73        return size;
  74}
  75
  76static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
  77{
  78        unsigned long othersize, delta_size;
  79        char type[10];
  80        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
  81        void *delta_buf;
  82
  83        if (!otherbuf)
  84                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
  85        delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size, ~0UL);
  86        if (!delta_buf || delta_size != entry->delta_size)
  87                die("delta size changed");
  88        free(buf);
  89        free(otherbuf);
  90        return delta_buf;
  91}
  92
  93static unsigned long write_object(FILE *f, struct object_entry *entry)
  94{
  95        unsigned long size;
  96        char type[10];
  97        void *buf = read_sha1_file(entry->sha1, type, &size);
  98        char header[21];
  99        unsigned hdrlen, datalen;
 100
 101        if (!buf)
 102                die("unable to read %s", sha1_to_hex(entry->sha1));
 103        if (size != entry->size)
 104                die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 105
 106        /*
 107         * The object header is a byte of 'type' followed by four bytes of
 108         * length, except for deltas that has the 20 bytes of delta sha
 109         * instead.
 110         */
 111        header[0] = ".CTB"[entry->type];
 112        datalen = htonl(size);
 113        memcpy(header+1, &datalen, 4);
 114        hdrlen = 5;
 115        if (entry->delta) {
 116                header[0] = 'D';
 117                memcpy(header+1, entry->delta, 20);
 118                buf = delta_against(buf, size, entry);
 119                size = entry->delta_size;
 120                hdrlen = 21;
 121        }
 122        fwrite(header, hdrlen, 1, f);
 123        datalen = fwrite_compressed(buf, size, f);
 124        free(buf);
 125        return hdrlen + datalen;
 126}
 127
 128static void write_pack_file(void)
 129{
 130        int i;
 131        FILE *f = create_file("pack");
 132        unsigned long offset = 0;
 133        unsigned long mb;
 134
 135        for (i = 0; i < nr_objects; i++) {
 136                struct object_entry *entry = objects + i;
 137                entry->offset = offset;
 138                offset += write_object(f, entry);
 139        }
 140        fclose(f);
 141        mb = offset >> 20;
 142        offset &= 0xfffff;
 143}
 144
 145static void write_index_file(void)
 146{
 147        int i;
 148        FILE *f = create_file("idx");
 149        struct object_entry **list = sorted_by_sha;
 150        struct object_entry **last = list + nr_objects;
 151        unsigned int array[256];
 152
 153        /*
 154         * Write the first-level table (the list is sorted,
 155         * but we use a 256-entry lookup to be able to avoid
 156         * having to do eight extra binary search iterations)
 157         */
 158        for (i = 0; i < 256; i++) {
 159                struct object_entry **next = list;
 160                while (next < last) {
 161                        struct object_entry *entry = *next;
 162                        if (entry->sha1[0] != i)
 163                                break;
 164                        next++;
 165                }
 166                array[i] = htonl(next - sorted_by_sha);
 167                list = next;
 168        }
 169        fwrite(array, 256, sizeof(int), f);
 170
 171        /*
 172         * Write the actual SHA1 entries..
 173         */
 174        list = sorted_by_sha;
 175        for (i = 0; i < nr_objects; i++) {
 176                struct object_entry *entry = *list++;
 177                unsigned int offset = htonl(entry->offset);
 178                fwrite(&offset, 4, 1, f);
 179                fwrite(entry->sha1, 20, 1, f);
 180        }
 181        fclose(f);
 182}
 183
 184static void add_object_entry(unsigned char *sha1)
 185{
 186        unsigned int idx = nr_objects;
 187        struct object_entry *entry;
 188
 189        if (idx >= nr_alloc) {
 190                unsigned int needed = (idx + 1024) * 3 / 2;
 191                objects = xrealloc(objects, needed * sizeof(*entry));
 192                nr_alloc = needed;
 193        }
 194        entry = objects + idx;
 195        memset(entry, 0, sizeof(*entry));
 196        memcpy(entry->sha1, sha1, 20);
 197        nr_objects = idx+1;
 198}
 199
 200static void check_object(struct object_entry *entry)
 201{
 202        char buffer[128];
 203        char type[10];
 204        unsigned long mapsize;
 205        z_stream stream;
 206        void *map;
 207
 208        map = map_sha1_file(entry->sha1, &mapsize);
 209        if (!map)
 210                die("unable to map %s", sha1_to_hex(entry->sha1));
 211        if (unpack_sha1_header(&stream, map, mapsize, buffer, sizeof(buffer)) < 0)
 212                die("unable to unpack %s header", sha1_to_hex(entry->sha1));
 213        munmap(map, mapsize);
 214        if (parse_sha1_header(buffer, type, &entry->size) < 0)
 215                die("unable to parse %s header", sha1_to_hex(entry->sha1));
 216        if (!strcmp(type, "commit")) {
 217                entry->type = OBJ_COMMIT;
 218        } else if (!strcmp(type, "tree")) {
 219                entry->type = OBJ_TREE;
 220        } else if (!strcmp(type, "blob")) {
 221                entry->type = OBJ_BLOB;
 222        } else
 223                die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type);
 224}
 225
 226static void get_object_details(void)
 227{
 228        int i;
 229        struct object_entry *entry = objects;
 230
 231        for (i = 0; i < nr_objects; i++)
 232                check_object(entry++);
 233}
 234
 235typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 236
 237static entry_sort_t current_sort;
 238
 239static int sort_comparator(const void *_a, const void *_b)
 240{
 241        struct object_entry *a = *(struct object_entry **)_a;
 242        struct object_entry *b = *(struct object_entry **)_b;
 243        return current_sort(a,b);
 244}
 245
 246static struct object_entry **create_sorted_list(entry_sort_t sort)
 247{
 248        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 249        int i;
 250
 251        for (i = 0; i < nr_objects; i++)
 252                list[i] = objects + i;
 253        current_sort = sort;
 254        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 255        return list;
 256}
 257
 258static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 259{
 260        return memcmp(a->sha1, b->sha1, 20);
 261}
 262
 263static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 264{
 265        if (a->type < b->type)
 266                return -1;
 267        if (a->type > b->type)
 268                return 1;
 269        if (a->size < b->size)
 270                return -1;
 271        if (a->size > b->size)
 272                return 1;
 273        return a < b ? -1 : (a > b);
 274}
 275
 276struct unpacked {
 277        struct object_entry *entry;
 278        void *data;
 279};
 280
 281/*
 282 * We search for deltas in a list sorted by type and by size, and
 283 * walk the "old" chain backwards in the list, so if the type
 284 * has changed or the size difference is too big, there's no point
 285 * in even continuing the walk, since the other old objects are
 286 * going to be even smaller or of a different type. So return -1
 287 * once we determine that there's no point even trying.
 288 */
 289static int try_delta(struct unpacked *cur, struct unpacked *old)
 290{
 291        struct object_entry *cur_entry = cur->entry;
 292        struct object_entry *old_entry = old->entry;
 293        unsigned long size, oldsize, delta_size;
 294        long max_size;
 295        void *delta_buf;
 296
 297        /* Don't bother doing diffs between different types */
 298        if (cur_entry->type != old_entry->type)
 299                return -1;
 300
 301        /* Size is guaranteed to be larger than or equal to oldsize */
 302        size = cur_entry->size;
 303        if (size < 50)
 304                return -1;
 305        oldsize = old_entry->size;
 306        if (size - oldsize > oldsize / 4)
 307                return -1;
 308
 309        /*
 310         * NOTE!
 311         *
 312         * We always delta from the bigger to the smaller, since that's
 313         * more space-efficient (deletes don't have to say _what_ they
 314         * delete).
 315         */
 316        max_size = size / 2 - 20;
 317        if (cur_entry->delta)
 318                max_size = cur_entry->delta_size-1;
 319        delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size, max_size);
 320        if (!delta_buf)
 321                return 0;
 322        cur_entry->delta = old_entry;
 323        cur_entry->delta_size = delta_size;
 324        free(delta_buf);
 325        return 0;
 326}
 327
 328static void find_deltas(struct object_entry **list, int window)
 329{
 330        unsigned int i;
 331        unsigned int array_size = window * sizeof(struct unpacked);
 332        struct unpacked *array = xmalloc(array_size);
 333
 334        memset(array, 0, array_size);
 335        for (i = 0; i < nr_objects; i++) {
 336                unsigned int idx = i % window;
 337                struct object_entry *entry = list[i];
 338                struct unpacked *n = array + idx;
 339                unsigned long size;
 340                char type[10];
 341                int j;
 342
 343                free(n->data);
 344                n->entry = entry;
 345                n->data = read_sha1_file(entry->sha1, type, &size);
 346                if (size != entry->size)
 347                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 348                j = window;
 349                while (--j > 0) {
 350                        unsigned int other_idx = idx + j;
 351                        struct unpacked *m;
 352                        if (other_idx >= window)
 353                                other_idx -= window;
 354                        m = array + other_idx;
 355                        if (!m->entry)
 356                                break;
 357                        if (try_delta(n, m) < 0)
 358                                break;
 359                }
 360        }
 361}
 362
 363int main(int argc, char **argv)
 364{
 365        char line[128];
 366        int window = 10;
 367        int i;
 368
 369        for (i = 1; i < argc; i++) {
 370                const char *arg = argv[i];
 371
 372                if (*arg == '-') {
 373                        if (!strncmp("--window=", arg, 9)) {
 374                                char *end;
 375                                window = strtoul(arg+9, &end, 0);
 376                                if (!arg[9] || *end)
 377                                        usage(pack_usage);
 378                                continue;
 379                        }
 380                        usage(pack_usage);
 381                }
 382                if (base_name)
 383                        usage(pack_usage);
 384                base_name = arg;
 385        }
 386
 387        if (!base_name)
 388                usage(pack_usage);
 389
 390        while (fgets(line, sizeof(line), stdin) != NULL) {
 391                unsigned char sha1[20];
 392                if (get_sha1_hex(line, sha1))
 393                        die("expected sha1, got garbage");
 394                add_object_entry(sha1);
 395        }
 396        get_object_details();
 397
 398        printf("Packing %d objects\n", nr_objects);
 399
 400        sorted_by_sha = create_sorted_list(sha1_sort);
 401        sorted_by_type = create_sorted_list(type_size_sort);
 402        if (window)
 403                find_deltas(sorted_by_type, window+1);
 404
 405        write_pack_file();
 406        write_index_file();
 407        return 0;
 408}