68c7e592b51b2174390bf9ab17e7dec7a9841d52
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4
   5static const char pack_usage[] = "git-pack-objects [--window=N] base-name < object-list";
   6
   7enum object_type {
   8        OBJ_NONE,
   9        OBJ_COMMIT,
  10        OBJ_TREE,
  11        OBJ_BLOB,
  12        OBJ_DELTA       // NOTE! This is _not_ the same as a "delta" object in the filesystem
  13};
  14
  15struct object_entry {
  16        unsigned char sha1[20];
  17        unsigned long size;
  18        unsigned long offset;
  19        unsigned int depth;
  20        unsigned int flags;
  21        enum object_type type;
  22        unsigned long delta_size;
  23        struct object_entry *delta;
  24};
  25
  26static struct object_entry **sorted_by_sha, **sorted_by_type;
  27static struct object_entry *objects = NULL;
  28static int nr_objects = 0, nr_alloc = 0;
  29static int delta_window = 10;
  30static const char *base_name;
  31
  32struct myfile {
  33        int fd;
  34        unsigned long chars;
  35        unsigned char buffer[8192];
  36};
  37
  38static FILE *create_file(const char *suffix)
  39{
  40        static char filename[PATH_MAX];
  41        unsigned len;
  42
  43        len = snprintf(filename, PATH_MAX, "%s.%s", base_name, suffix);
  44        if (len >= PATH_MAX)
  45                die("you wascally wabbit, you");
  46        return fopen(filename, "w");
  47}
  48
  49static unsigned long fwrite_compressed(void *in, unsigned long size, FILE *f)
  50{
  51        z_stream stream;
  52        unsigned long maxsize;
  53        void *out;
  54
  55        memset(&stream, 0, sizeof(stream));
  56        deflateInit(&stream, Z_DEFAULT_COMPRESSION);
  57        maxsize = deflateBound(&stream, size);
  58        out = xmalloc(maxsize);
  59
  60        /* Compress it */
  61        stream.next_in = in;
  62        stream.avail_in = size;
  63
  64        stream.next_out = out;
  65        stream.avail_out = maxsize;
  66
  67        while (deflate(&stream, Z_FINISH) == Z_OK)
  68                /* nothing */;
  69        deflateEnd(&stream);
  70
  71        size = stream.total_out;
  72        fwrite(out, size, 1, f);
  73        free(out);
  74        return size;
  75}
  76
  77static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
  78{
  79        unsigned long othersize, delta_size;
  80        char type[10];
  81        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
  82        void *delta_buf;
  83
  84        if (!otherbuf)
  85                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
  86        delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size);
  87        if (delta_size != entry->delta_size)
  88                die("delta size changed");
  89        free(buf);
  90        free(otherbuf);
  91        return delta_buf;
  92}
  93
  94static unsigned long write_object(FILE *f, struct object_entry *entry)
  95{
  96        unsigned long size;
  97        char type[10];
  98        void *buf = read_sha1_file(entry->sha1, type, &size);
  99        char header[21];
 100        unsigned hdrlen, datalen;
 101
 102        if (!buf)
 103                die("unable to read %s", sha1_to_hex(entry->sha1));
 104        if (size != entry->size)
 105                die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 106
 107        /*
 108         * The object header is a byte of 'type' followed by four bytes of
 109         * length, except for deltas that has the 20 bytes of delta sha
 110         * instead.
 111         */
 112        header[0] = ".CTB"[entry->type];
 113        datalen = htonl(size);
 114        memcpy(header+1, &datalen, 4);
 115        hdrlen = 5;
 116        if (entry->delta) {
 117                header[0] = 'D';
 118                memcpy(header+1, entry->delta, 20);
 119                buf = delta_against(buf, size, entry);
 120                size = entry->delta_size;
 121                hdrlen = 21;
 122        }
 123        fwrite(header, hdrlen, 1, f);
 124        datalen = fwrite_compressed(buf, size, f);
 125        free(buf);
 126        return hdrlen + datalen;
 127}
 128
 129static void write_pack_file(void)
 130{
 131        int i;
 132        FILE *f = create_file("pack");
 133        unsigned long offset = 0;
 134        unsigned long mb;
 135
 136        for (i = 0; i < nr_objects; i++) {
 137                struct object_entry *entry = objects + i;
 138                entry->offset = offset;
 139                offset += write_object(f, entry);
 140        }
 141        fclose(f);
 142        mb = offset >> 20;
 143        offset &= 0xfffff;
 144}
 145
 146static void write_index_file(void)
 147{
 148        int i;
 149        FILE *f = create_file("idx");
 150        struct object_entry **list = sorted_by_sha;
 151        struct object_entry **last = list + nr_objects;
 152        unsigned int array[256];
 153
 154        /*
 155         * Write the first-level table (the list is sorted,
 156         * but we use a 256-entry lookup to be able to avoid
 157         * having to do eight extra binary search iterations)
 158         */
 159        for (i = 0; i < 256; i++) {
 160                struct object_entry **next = list;
 161                while (next < last) {
 162                        struct object_entry *entry = *next;
 163                        if (entry->sha1[0] != i)
 164                                break;
 165                        next++;
 166                }
 167                array[i] = htonl(next - sorted_by_sha);
 168                list = next;
 169        }
 170        fwrite(array, 256, sizeof(int), f);
 171
 172        /*
 173         * Write the actual SHA1 entries..
 174         */
 175        list = sorted_by_sha;
 176        for (i = 0; i < nr_objects; i++) {
 177                struct object_entry *entry = *list++;
 178                unsigned int offset = htonl(entry->offset);
 179                fwrite(&offset, 4, 1, f);
 180                fwrite(entry->sha1, 20, 1, f);
 181        }
 182        fclose(f);
 183}
 184
 185static void add_object_entry(unsigned char *sha1)
 186{
 187        unsigned int idx = nr_objects;
 188        struct object_entry *entry;
 189
 190        if (idx >= nr_alloc) {
 191                unsigned int needed = (idx + 1024) * 3 / 2;
 192                objects = xrealloc(objects, needed * sizeof(*entry));
 193                nr_alloc = needed;
 194        }
 195        entry = objects + idx;
 196        memset(entry, 0, sizeof(*entry));
 197        memcpy(entry->sha1, sha1, 20);
 198        nr_objects = idx+1;
 199}
 200
 201static void check_object(struct object_entry *entry)
 202{
 203        char buffer[128];
 204        char type[10];
 205        unsigned long mapsize;
 206        z_stream stream;
 207        void *map;
 208
 209        map = map_sha1_file(entry->sha1, &mapsize);
 210        if (!map)
 211                die("unable to map %s", sha1_to_hex(entry->sha1));
 212        if (unpack_sha1_header(&stream, map, mapsize, buffer, sizeof(buffer)) < 0)
 213                die("unable to unpack %s header", sha1_to_hex(entry->sha1));
 214        munmap(map, mapsize);
 215        if (parse_sha1_header(buffer, type, &entry->size) < 0)
 216                die("unable to parse %s header", sha1_to_hex(entry->sha1));
 217        if (!strcmp(type, "commit")) {
 218                entry->type = OBJ_COMMIT;
 219        } else if (!strcmp(type, "tree")) {
 220                entry->type = OBJ_TREE;
 221        } else if (!strcmp(type, "blob")) {
 222                entry->type = OBJ_BLOB;
 223        } else
 224                die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type);
 225}
 226
 227static void get_object_details(void)
 228{
 229        int i;
 230        struct object_entry *entry = objects;
 231
 232        for (i = 0; i < nr_objects; i++)
 233                check_object(entry++);
 234}
 235
 236typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 237
 238static entry_sort_t current_sort;
 239
 240static int sort_comparator(const void *_a, const void *_b)
 241{
 242        struct object_entry *a = *(struct object_entry **)_a;
 243        struct object_entry *b = *(struct object_entry **)_b;
 244        return current_sort(a,b);
 245}
 246
 247static struct object_entry **create_sorted_list(entry_sort_t sort)
 248{
 249        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 250        int i;
 251
 252        for (i = 0; i < nr_objects; i++)
 253                list[i] = objects + i;
 254        current_sort = sort;
 255        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 256        return list;
 257}
 258
 259static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 260{
 261        return memcmp(a->sha1, b->sha1, 20);
 262}
 263
 264static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 265{
 266        if (a->type < b->type)
 267                return -1;
 268        if (a->type > b->type)
 269                return 1;
 270        if (a->size < b->size)
 271                return -1;
 272        if (a->size > b->size)
 273                return 1;
 274        return a < b ? -1 : (a > b);
 275}
 276
 277struct unpacked {
 278        struct object_entry *entry;
 279        void *data;
 280};
 281
 282/*
 283 * We search for deltas in a list sorted by type and by size, and
 284 * walk the "old" chain backwards in the list, so if the type
 285 * has changed or the size difference is too big, there's no point
 286 * in even continuing the walk, since the other old objects are
 287 * going to be even smaller or of a different type. So return -1
 288 * once we determine that there's no point even trying.
 289 */
 290static int try_delta(struct unpacked *cur, struct unpacked *old)
 291{
 292        struct object_entry *cur_entry = cur->entry;
 293        struct object_entry *old_entry = old->entry;
 294        unsigned long size, oldsize, delta_size;
 295        void *delta_buf;
 296
 297        /* Don't bother doing diffs between different types */
 298        if (cur_entry->type != old_entry->type)
 299                return -1;
 300
 301        /* Size is guaranteed to be larger than or equal to oldsize */
 302        size = cur_entry->size;
 303        oldsize = old_entry->size;
 304        if (size - oldsize > oldsize / 4)
 305                return -1;
 306
 307        /*
 308         * NOTE!
 309         *
 310         * We always delta from the bigger to the smaller, since that's
 311         * more space-efficient (deletes don't have to say _what_ they
 312         * delete).
 313         */
 314        delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size);
 315        if (!delta_buf)
 316                die("unable to create delta");
 317        if (delta_size + 20 < size / 2) {
 318                if (!cur_entry->delta || cur_entry->delta_size > delta_size) {
 319                        cur_entry->delta = old_entry;
 320                        cur_entry->delta_size = delta_size;
 321                }
 322        }
 323        free(delta_buf);
 324        return 0;
 325}
 326
 327static void find_deltas(struct object_entry **list, int window)
 328{
 329        unsigned int i;
 330        unsigned int array_size = window * sizeof(struct unpacked);
 331        struct unpacked *array = xmalloc(array_size);
 332
 333        memset(array, 0, array_size);
 334        for (i = 0; i < nr_objects; i++) {
 335                unsigned int idx = i % window;
 336                struct object_entry *entry = list[i];
 337                struct unpacked *n = array + idx;
 338                unsigned long size;
 339                char type[10];
 340                int j;
 341
 342                free(n->data);
 343                n->entry = entry;
 344                n->data = read_sha1_file(entry->sha1, type, &size);
 345                if (size != entry->size)
 346                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 347                for (j = 0; j < window; j++) {
 348                        unsigned int other_idx = idx - 1 - j;
 349                        struct unpacked *m;
 350                        if (other_idx < 0)
 351                                other_idx += window;
 352                        m = array + other_idx;
 353                        if (!m->entry)
 354                                break;
 355                        if (try_delta(n, m) < 0)
 356                                break;
 357                }
 358        }
 359}
 360
 361int main(int argc, char **argv)
 362{
 363        char line[128];
 364        int i;
 365
 366        for (i = 1; i < argc; i++) {
 367                const char *arg = argv[i];
 368
 369                if (*arg == '-') {
 370                        if (!strncmp("--window=", arg, 9)) {
 371                                char *end;
 372                                delta_window = strtoul(arg+9, &end, 0);
 373                                if (!arg[9] || *end)
 374                                        usage(pack_usage);
 375                                continue;
 376                        }
 377                        usage(pack_usage);
 378                }
 379                if (base_name)
 380                        usage(pack_usage);
 381                base_name = arg;
 382        }
 383
 384        if (!base_name)
 385                usage(pack_usage);
 386
 387        while (fgets(line, sizeof(line), stdin) != NULL) {
 388                unsigned char sha1[20];
 389                if (get_sha1_hex(line, sha1))
 390                        die("expected sha1, got garbage");
 391                add_object_entry(sha1);
 392        }
 393        get_object_details();
 394
 395        printf("Packing %d objects\n", nr_objects);
 396
 397        sorted_by_sha = create_sorted_list(sha1_sort);
 398        sorted_by_type = create_sorted_list(type_size_sort);
 399        if (delta_window)
 400                find_deltas(sorted_by_type, delta_window);
 401
 402        write_pack_file();
 403        write_index_file();
 404        return 0;
 405}