322c4bc3e9da508900801a316c6ac4eeac4ab468
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4
   5static const char pack_usage[] = "git-pack-objects [--window=N] base-name < object-list";
   6
   7enum object_type {
   8        OBJ_NONE,
   9        OBJ_COMMIT,
  10        OBJ_TREE,
  11        OBJ_BLOB,
  12        OBJ_DELTA       // NOTE! This is _not_ the same as a "delta" object in the filesystem
  13};
  14
  15struct object_entry {
  16        unsigned char sha1[20];
  17        unsigned long size;
  18        unsigned long offset;
  19        unsigned int depth;
  20        unsigned int flags;
  21        enum object_type type;
  22        unsigned long delta_size;
  23        struct object_entry *delta;
  24};
  25
  26static struct object_entry **sorted_by_sha, **sorted_by_type;
  27static struct object_entry *objects = NULL;
  28static int nr_objects = 0, nr_alloc = 0;
  29static int delta_window = 10;
  30static const char *base_name;
  31
  32struct myfile {
  33        int fd;
  34        unsigned long chars;
  35        unsigned char buffer[8192];
  36};
  37
  38static FILE *create_file(const char *suffix)
  39{
  40        static char filename[PATH_MAX];
  41        unsigned len;
  42
  43        len = snprintf(filename, PATH_MAX, "%s.%s", base_name, suffix);
  44        if (len >= PATH_MAX)
  45                die("you wascally wabbit, you");
  46        return fopen(filename, "w");
  47}
  48
  49static unsigned long fwrite_compressed(void *in, unsigned long size, FILE *f)
  50{
  51        z_stream stream;
  52        unsigned long maxsize;
  53        void *out;
  54
  55        memset(&stream, 0, sizeof(stream));
  56        deflateInit(&stream, Z_DEFAULT_COMPRESSION);
  57        maxsize = deflateBound(&stream, size);
  58        out = xmalloc(maxsize);
  59
  60        /* Compress it */
  61        stream.next_in = in;
  62        stream.avail_in = size;
  63
  64        stream.next_out = out;
  65        stream.avail_out = maxsize;
  66
  67        while (deflate(&stream, Z_FINISH) == Z_OK)
  68                /* nothing */;
  69        deflateEnd(&stream);
  70
  71        size = stream.total_out;
  72        fwrite(out, size, 1, f);
  73        free(out);
  74        return size;
  75}
  76
  77static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
  78{
  79        unsigned long othersize, delta_size;
  80        char type[10];
  81        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
  82        void *delta_buf;
  83
  84        if (!otherbuf)
  85                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
  86        delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size);
  87        if (delta_size != entry->delta_size)
  88                die("delta size changed");
  89        free(buf);
  90        free(otherbuf);
  91        return delta_buf;
  92}
  93
  94static unsigned long write_object(FILE *f, struct object_entry *entry)
  95{
  96        unsigned long size;
  97        char type[10];
  98        void *buf = read_sha1_file(entry->sha1, type, &size);
  99        char header[21];
 100        unsigned hdrlen, datalen;
 101
 102        if (!buf)
 103                die("unable to read %s", sha1_to_hex(entry->sha1));
 104        if (size != entry->size)
 105                die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 106
 107        /*
 108         * The object header is a byte of 'type' followed by four bytes of
 109         * length, except for deltas that has the 20 bytes of delta sha
 110         * instead.
 111         */
 112        header[0] = ".CTB"[entry->type];
 113        datalen = htonl(size);
 114        memcpy(header+1, &datalen, 4);
 115        hdrlen = 5;
 116        if (entry->delta) {
 117                memcpy(header+1, entry->delta, 20);
 118                buf = delta_against(buf, size, entry);
 119                size = entry->delta_size;
 120                hdrlen = 21;
 121        }
 122        fwrite(header, hdrlen, 1, f);
 123        datalen = fwrite_compressed(buf, size, f);
 124        free(buf);
 125        return hdrlen + datalen;
 126}
 127
 128static void write_pack_file(void)
 129{
 130        int i;
 131        FILE *f = create_file("pack");
 132        unsigned long offset = 0;
 133        unsigned long mb;
 134
 135        for (i = 0; i < nr_objects; i++) {
 136                struct object_entry *entry = objects + i;
 137                entry->offset = offset;
 138                offset += write_object(f, entry);
 139        }
 140        fclose(f);
 141        mb = offset >> 20;
 142        offset &= 0xfffff;
 143}
 144
 145static void write_index_file(void)
 146{
 147        int i;
 148        FILE *f = create_file("idx");
 149        struct object_entry **list = sorted_by_sha;
 150        struct object_entry **last = list + nr_objects;
 151        unsigned int array[256];
 152
 153        /*
 154         * Write the first-level table (the list is sorted,
 155         * but we use a 256-entry lookup to be able to avoid
 156         * having to do eight extra binary search iterations)
 157         */
 158        for (i = 0; i < 256; i++) {
 159                struct object_entry **next = list;
 160                while (next < last) {
 161                        struct object_entry *entry = *next;
 162                        if (entry->sha1[0] != i)
 163                                break;
 164                        next++;
 165                }
 166                array[i] = htonl(next - sorted_by_sha);
 167                list = next;
 168        }
 169        fwrite(array, 256, sizeof(int), f);
 170
 171        /*
 172         * Write the actual SHA1 entries..
 173         */
 174        list = sorted_by_sha;
 175        for (i < 0; i < nr_objects; i++) {
 176                struct object_entry *entry = *list++;
 177                unsigned int offset = htonl(entry->offset);
 178                fwrite(&offset, 4, 1, f);
 179                fwrite(entry->sha1, 20, 1, f);
 180        }
 181        fclose(f);
 182}
 183
 184static void add_object_entry(unsigned char *sha1)
 185{
 186        unsigned int idx = nr_objects;
 187        struct object_entry *entry;
 188
 189        if (idx >= nr_alloc) {
 190                unsigned int needed = (idx + 1024) * 3 / 2;
 191                objects = xrealloc(objects, needed * sizeof(*entry));
 192                nr_alloc = needed;
 193        }
 194        entry = objects + idx;
 195        memset(entry, 0, sizeof(*entry));
 196        memcpy(entry->sha1, sha1, 20);
 197        nr_objects = idx+1;
 198}
 199
 200static void check_object(struct object_entry *entry)
 201{
 202        char buffer[128];
 203        char type[10];
 204        unsigned long mapsize;
 205        z_stream stream;
 206        void *map;
 207
 208        map = map_sha1_file(entry->sha1, &mapsize);
 209        if (!map)
 210                die("unable to map %s", sha1_to_hex(entry->sha1));
 211        if (unpack_sha1_header(&stream, map, mapsize, buffer, sizeof(buffer)) < 0)
 212                die("unable to unpack %s header", sha1_to_hex(entry->sha1));
 213        munmap(map, mapsize);
 214        if (parse_sha1_header(buffer, type, &entry->size) < 0)
 215                die("unable to parse %s header", sha1_to_hex(entry->sha1));
 216        if (!strcmp(type, "commit")) {
 217                entry->type = OBJ_COMMIT;
 218        } else if (!strcmp(type, "tree")) {
 219                entry->type = OBJ_TREE;
 220        } else if (!strcmp(type, "blob")) {
 221                entry->type = OBJ_BLOB;
 222        } else
 223                die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type);
 224}
 225
 226static void get_object_details(void)
 227{
 228        int i;
 229        struct object_entry *entry = objects;
 230
 231        for (i = 0; i < nr_objects; i++)
 232                check_object(entry++);
 233}
 234
 235typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 236
 237static entry_sort_t current_sort;
 238
 239static int sort_comparator(const void *_a, const void *_b)
 240{
 241        struct object_entry *a = *(struct object_entry **)_a;
 242        struct object_entry *b = *(struct object_entry **)_b;
 243        return current_sort(a,b);
 244}
 245
 246static struct object_entry **create_sorted_list(entry_sort_t sort)
 247{
 248        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 249        int i;
 250
 251        for (i = 0; i < nr_objects; i++)
 252                list[i] = objects + i;
 253        current_sort = sort;
 254        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 255        return list;
 256}
 257
 258static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 259{
 260        return memcmp(a->sha1, b->sha1, 20);
 261}
 262
 263static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 264{
 265        if (a->type < b->type)
 266                return -1;
 267        if (a->type > b->type)
 268                return 1;
 269        if (a->size < b->size)
 270                return -1;
 271        if (a->size > b->size)
 272                return 1;
 273        return a < b ? -1 : (a > b);
 274}
 275
 276struct unpacked {
 277        struct object_entry *entry;
 278        void *data;
 279};
 280
 281/*
 282 * We search for deltas in a list sorted by type and by size, and
 283 * walk the "old" chain backwards in the list, so if the type
 284 * has changed or the size difference is too big, there's no point
 285 * in even continuing the walk, since the other old objects are
 286 * going to be even smaller or of a different type. So return -1
 287 * once we determine that there's no point even trying.
 288 */
 289static int try_delta(struct unpacked *cur, struct unpacked *old)
 290{
 291        struct object_entry *cur_entry = cur->entry;
 292        struct object_entry *old_entry = old->entry;
 293        unsigned long size, oldsize, delta_size;
 294        void *delta_buf;
 295
 296        /* Don't bother doing diffs between different types */
 297        if (cur_entry->type != old_entry->type)
 298                return -1;
 299
 300        /* Size is guaranteed to be larger than or equal to oldsize */
 301        size = cur_entry->size;
 302        oldsize = old_entry->size;
 303        if (size - oldsize > oldsize / 4)
 304                return -1;
 305
 306        /*
 307         * NOTE!
 308         *
 309         * We always delta from the bigger to the smaller, since that's
 310         * more space-efficient (deletes don't have to say _what_ they
 311         * delete).
 312         */
 313        delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size);
 314        if (!delta_buf)
 315                die("unable to create delta");
 316        if (delta_size + 20 < size / 2) {
 317                if (!cur_entry->delta || cur_entry->delta_size > delta_size) {
 318                        cur_entry->delta = old_entry;
 319                        cur_entry->delta_size = delta_size;
 320                }
 321        }
 322        free(delta_buf);
 323}
 324
 325static void find_deltas(struct object_entry **list, int window)
 326{
 327        unsigned int i;
 328        unsigned int array_size = window * sizeof(struct unpacked);
 329        struct unpacked *array = xmalloc(array_size);
 330
 331        memset(array, 0, array_size);
 332        for (i = 0; i < nr_objects; i++) {
 333                unsigned int idx = i % window;
 334                struct object_entry *entry = list[i];
 335                struct unpacked *n = array + idx;
 336                unsigned long size;
 337                char type[10];
 338                int j;
 339
 340                free(n->data);
 341                n->entry = entry;
 342                n->data = read_sha1_file(entry->sha1, type, &size);
 343                if (size != entry->size)
 344                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 345                for (j = 0; j < window; j++) {
 346                        unsigned int other_idx = idx - 1 - j;
 347                        struct unpacked *m;
 348                        if (other_idx < 0)
 349                                other_idx += window;
 350                        m = array + other_idx;
 351                        if (!m->entry)
 352                                break;
 353                        if (try_delta(n, m) < 0)
 354                                break;
 355                }
 356        }
 357}
 358
 359int main(int argc, char **argv)
 360{
 361        char line[128];
 362        int i;
 363
 364        for (i = 1; i < argc; i++) {
 365                const char *arg = argv[i];
 366
 367                if (*arg == '-') {
 368                        if (!strncmp("--window=", arg, 9)) {
 369                                char *end;
 370                                delta_window = strtoul(arg+9, &end, 0);
 371                                if (!arg[9] || *end)
 372                                        usage(pack_usage);
 373                                continue;
 374                        }
 375                        usage(pack_usage);
 376                }
 377                if (base_name)
 378                        usage(pack_usage);
 379                base_name = arg;
 380        }
 381
 382        if (!base_name)
 383                usage(pack_usage);
 384
 385        while (fgets(line, sizeof(line), stdin) != NULL) {
 386                unsigned char sha1[20];
 387                if (get_sha1_hex(line, sha1))
 388                        die("expected sha1, got garbage");
 389                add_object_entry(sha1);
 390        }
 391        get_object_details();
 392
 393        printf("Packing %d objects\n", nr_objects);
 394
 395        sorted_by_sha = create_sorted_list(sha1_sort);
 396        sorted_by_type = create_sorted_list(type_size_sort);
 397        if (delta_window)
 398                find_deltas(sorted_by_type, delta_window);
 399
 400        write_pack_file();
 401        write_index_file();
 402        return 0;
 403}