pack-objects.con commit Merge branch 'master' (cfac3f3)
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4#include "pack.h"
   5#include "csum-file.h"
   6#include <sys/time.h>
   7
   8static const char pack_usage[] = "git-pack-objects [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
   9
  10struct object_entry {
  11        unsigned char sha1[20];
  12        unsigned long size;
  13        unsigned long offset;
  14        unsigned int depth;
  15        unsigned int hash;
  16        enum object_type type;
  17        unsigned long delta_size;
  18        struct object_entry *delta;
  19};
  20
  21static unsigned char object_list_sha1[20];
  22static int non_empty = 0;
  23static int local = 0;
  24static int incremental = 0;
  25static struct object_entry **sorted_by_sha, **sorted_by_type;
  26static struct object_entry *objects = NULL;
  27static int nr_objects = 0, nr_alloc = 0;
  28static const char *base_name;
  29static unsigned char pack_file_sha1[20];
  30static int progress = 0;
  31
  32static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
  33{
  34        unsigned long othersize, delta_size;
  35        char type[10];
  36        void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
  37        void *delta_buf;
  38
  39        if (!otherbuf)
  40                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
  41        delta_buf = diff_delta(otherbuf, othersize,
  42                               buf, size, &delta_size, 0);
  43        if (!delta_buf || delta_size != entry->delta_size)
  44                die("delta size changed");
  45        free(buf);
  46        free(otherbuf);
  47        return delta_buf;
  48}
  49
  50/*
  51 * The per-object header is a pretty dense thing, which is
  52 *  - first byte: low four bits are "size", then three bits of "type",
  53 *    and the high bit is "size continues".
  54 *  - each byte afterwards: low seven bits are size continuation,
  55 *    with the high bit being "size continues"
  56 */
  57static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
  58{
  59        int n = 1;
  60        unsigned char c;
  61
  62        if (type < OBJ_COMMIT || type > OBJ_DELTA)
  63                die("bad type %d", type);
  64
  65        c = (type << 4) | (size & 15);
  66        size >>= 4;
  67        while (size) {
  68                *hdr++ = c | 0x80;
  69                c = size & 0x7f;
  70                size >>= 7;
  71                n++;
  72        }
  73        *hdr = c;
  74        return n;
  75}
  76
  77static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
  78{
  79        unsigned long size;
  80        char type[10];
  81        void *buf = read_sha1_file(entry->sha1, type, &size);
  82        unsigned char header[10];
  83        unsigned hdrlen, datalen;
  84        enum object_type obj_type;
  85
  86        if (!buf)
  87                die("unable to read %s", sha1_to_hex(entry->sha1));
  88        if (size != entry->size)
  89                die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
  90
  91        /*
  92         * The object header is a byte of 'type' followed by zero or
  93         * more bytes of length.  For deltas, the 20 bytes of delta sha1
  94         * follows that.
  95         */
  96        obj_type = entry->type;
  97        if (entry->delta) {
  98                buf = delta_against(buf, size, entry);
  99                size = entry->delta_size;
 100                obj_type = OBJ_DELTA;
 101        }
 102        hdrlen = encode_header(obj_type, size, header);
 103        sha1write(f, header, hdrlen);
 104        if (entry->delta) {
 105                sha1write(f, entry->delta, 20);
 106                hdrlen += 20;
 107        }
 108        datalen = sha1write_compressed(f, buf, size);
 109        free(buf);
 110        return hdrlen + datalen;
 111}
 112
 113static unsigned long write_one(struct sha1file *f,
 114                               struct object_entry *e,
 115                               unsigned long offset)
 116{
 117        if (e->offset)
 118                /* offset starts from header size and cannot be zero
 119                 * if it is written already.
 120                 */
 121                return offset;
 122        e->offset = offset;
 123        offset += write_object(f, e);
 124        /* if we are deltified, write out its base object. */
 125        if (e->delta)
 126                offset = write_one(f, e->delta, offset);
 127        return offset;
 128}
 129
 130static void write_pack_file(void)
 131{
 132        int i;
 133        struct sha1file *f;
 134        unsigned long offset;
 135        unsigned long mb;
 136        struct pack_header hdr;
 137
 138        if (!base_name)
 139                f = sha1fd(1, "<stdout>");
 140        else
 141                f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
 142        hdr.hdr_signature = htonl(PACK_SIGNATURE);
 143        hdr.hdr_version = htonl(PACK_VERSION);
 144        hdr.hdr_entries = htonl(nr_objects);
 145        sha1write(f, &hdr, sizeof(hdr));
 146        offset = sizeof(hdr);
 147        for (i = 0; i < nr_objects; i++)
 148                offset = write_one(f, objects + i, offset);
 149
 150        sha1close(f, pack_file_sha1, 1);
 151        mb = offset >> 20;
 152        offset &= 0xfffff;
 153}
 154
 155static void write_index_file(void)
 156{
 157        int i;
 158        struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
 159        struct object_entry **list = sorted_by_sha;
 160        struct object_entry **last = list + nr_objects;
 161        unsigned int array[256];
 162
 163        /*
 164         * Write the first-level table (the list is sorted,
 165         * but we use a 256-entry lookup to be able to avoid
 166         * having to do eight extra binary search iterations).
 167         */
 168        for (i = 0; i < 256; i++) {
 169                struct object_entry **next = list;
 170                while (next < last) {
 171                        struct object_entry *entry = *next;
 172                        if (entry->sha1[0] != i)
 173                                break;
 174                        next++;
 175                }
 176                array[i] = htonl(next - sorted_by_sha);
 177                list = next;
 178        }
 179        sha1write(f, array, 256 * sizeof(int));
 180
 181        /*
 182         * Write the actual SHA1 entries..
 183         */
 184        list = sorted_by_sha;
 185        for (i = 0; i < nr_objects; i++) {
 186                struct object_entry *entry = *list++;
 187                unsigned int offset = htonl(entry->offset);
 188                sha1write(f, &offset, 4);
 189                sha1write(f, entry->sha1, 20);
 190        }
 191        sha1write(f, pack_file_sha1, 20);
 192        sha1close(f, NULL, 1);
 193}
 194
 195static int add_object_entry(unsigned char *sha1, unsigned int hash)
 196{
 197        unsigned int idx = nr_objects;
 198        struct object_entry *entry;
 199
 200        if (incremental || local) {
 201                struct packed_git *p;
 202
 203                for (p = packed_git; p; p = p->next) {
 204                        struct pack_entry e;
 205
 206                        if (find_pack_entry_one(sha1, &e, p)) {
 207                                if (incremental)
 208                                        return 0;
 209                                if (local && !p->pack_local)
 210                                        return 0;
 211                        }
 212                }
 213        }
 214
 215        if (idx >= nr_alloc) {
 216                unsigned int needed = (idx + 1024) * 3 / 2;
 217                objects = xrealloc(objects, needed * sizeof(*entry));
 218                nr_alloc = needed;
 219        }
 220        entry = objects + idx;
 221        memset(entry, 0, sizeof(*entry));
 222        memcpy(entry->sha1, sha1, 20);
 223        entry->hash = hash;
 224        nr_objects = idx+1;
 225        return 1;
 226}
 227
 228static void check_object(struct object_entry *entry)
 229{
 230        char type[20];
 231
 232        if (!sha1_object_info(entry->sha1, type, &entry->size)) {
 233                if (!strcmp(type, "commit")) {
 234                        entry->type = OBJ_COMMIT;
 235                } else if (!strcmp(type, "tree")) {
 236                        entry->type = OBJ_TREE;
 237                } else if (!strcmp(type, "blob")) {
 238                        entry->type = OBJ_BLOB;
 239                } else if (!strcmp(type, "tag")) {
 240                        entry->type = OBJ_TAG;
 241                } else
 242                        die("unable to pack object %s of type %s",
 243                            sha1_to_hex(entry->sha1), type);
 244        }
 245        else
 246                die("unable to get type of object %s",
 247                    sha1_to_hex(entry->sha1));
 248}
 249
 250static void get_object_details(void)
 251{
 252        int i;
 253        struct object_entry *entry = objects;
 254
 255        for (i = 0; i < nr_objects; i++)
 256                check_object(entry++);
 257}
 258
 259typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
 260
 261static entry_sort_t current_sort;
 262
 263static int sort_comparator(const void *_a, const void *_b)
 264{
 265        struct object_entry *a = *(struct object_entry **)_a;
 266        struct object_entry *b = *(struct object_entry **)_b;
 267        return current_sort(a,b);
 268}
 269
 270static struct object_entry **create_sorted_list(entry_sort_t sort)
 271{
 272        struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
 273        int i;
 274
 275        for (i = 0; i < nr_objects; i++)
 276                list[i] = objects + i;
 277        current_sort = sort;
 278        qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
 279        return list;
 280}
 281
 282static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 283{
 284        return memcmp(a->sha1, b->sha1, 20);
 285}
 286
 287static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 288{
 289        if (a->type < b->type)
 290                return -1;
 291        if (a->type > b->type)
 292                return 1;
 293        if (a->hash < b->hash)
 294                return -1;
 295        if (a->hash > b->hash)
 296                return 1;
 297        if (a->size < b->size)
 298                return -1;
 299        if (a->size > b->size)
 300                return 1;
 301        return a < b ? -1 : (a > b);
 302}
 303
 304struct unpacked {
 305        struct object_entry *entry;
 306        void *data;
 307};
 308
 309/*
 310 * We search for deltas _backwards_ in a list sorted by type and
 311 * by size, so that we see progressively smaller and smaller files.
 312 * That's because we prefer deltas to be from the bigger file
 313 * to the smaller - deletes are potentially cheaper, but perhaps
 314 * more importantly, the bigger file is likely the more recent
 315 * one.
 316 */
 317static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
 318{
 319        struct object_entry *cur_entry = cur->entry;
 320        struct object_entry *old_entry = old->entry;
 321        unsigned long size, oldsize, delta_size, sizediff;
 322        long max_size;
 323        void *delta_buf;
 324
 325        /* Don't bother doing diffs between different types */
 326        if (cur_entry->type != old_entry->type)
 327                return -1;
 328
 329        size = cur_entry->size;
 330        if (size < 50)
 331                return -1;
 332        oldsize = old_entry->size;
 333        sizediff = oldsize > size ? oldsize - size : size - oldsize;
 334        if (sizediff > size / 8)
 335                return -1;
 336        if (old_entry->depth >= max_depth)
 337                return 0;
 338
 339        /*
 340         * NOTE!
 341         *
 342         * We always delta from the bigger to the smaller, since that's
 343         * more space-efficient (deletes don't have to say _what_ they
 344         * delete).
 345         */
 346        max_size = size / 2 - 20;
 347        if (cur_entry->delta)
 348                max_size = cur_entry->delta_size-1;
 349        if (sizediff >= max_size)
 350                return -1;
 351        delta_buf = diff_delta(old->data, oldsize,
 352                               cur->data, size, &delta_size, max_size);
 353        if (!delta_buf)
 354                return 0;
 355        cur_entry->delta = old_entry;
 356        cur_entry->delta_size = delta_size;
 357        cur_entry->depth = old_entry->depth + 1;
 358        free(delta_buf);
 359        return 0;
 360}
 361
 362static void find_deltas(struct object_entry **list, int window, int depth)
 363{
 364        int i, idx;
 365        unsigned int array_size = window * sizeof(struct unpacked);
 366        struct unpacked *array = xmalloc(array_size);
 367        int eye_candy;
 368
 369        memset(array, 0, array_size);
 370        i = nr_objects;
 371        idx = 0;
 372        eye_candy = i - (nr_objects / 20);
 373
 374        while (--i >= 0) {
 375                struct object_entry *entry = list[i];
 376                struct unpacked *n = array + idx;
 377                unsigned long size;
 378                char type[10];
 379                int j;
 380
 381                if (progress && i <= eye_candy) {
 382                        eye_candy -= nr_objects / 20;
 383                        fputc('.', stderr);
 384                }
 385                free(n->data);
 386                n->entry = entry;
 387                n->data = read_sha1_file(entry->sha1, type, &size);
 388                if (size != entry->size)
 389                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 390                j = window;
 391                while (--j > 0) {
 392                        unsigned int other_idx = idx + j;
 393                        struct unpacked *m;
 394                        if (other_idx >= window)
 395                                other_idx -= window;
 396                        m = array + other_idx;
 397                        if (!m->entry)
 398                                break;
 399                        if (try_delta(n, m, depth) < 0)
 400                                break;
 401                }
 402                idx++;
 403                if (idx >= window)
 404                        idx = 0;
 405        }
 406
 407        for (i = 0; i < window; ++i)
 408                free(array[i].data);
 409        free(array);
 410}
 411
 412static void prepare_pack(int window, int depth)
 413{
 414        get_object_details();
 415
 416        if (progress)
 417                fprintf(stderr, "Packing %d objects", nr_objects);
 418        sorted_by_type = create_sorted_list(type_size_sort);
 419        if (window && depth)
 420                find_deltas(sorted_by_type, window+1, depth);
 421        if (progress)
 422                fputc('\n', stderr);
 423        write_pack_file();
 424}
 425
 426static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
 427{
 428        static const char cache[] = "pack-cache/pack-%s.%s";
 429        char *cached_pack, *cached_idx;
 430        int ifd, ofd, ifd_ix = -1;
 431
 432        cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
 433        ifd = open(cached_pack, O_RDONLY);
 434        if (ifd < 0)
 435                return 0;
 436
 437        if (!pack_to_stdout) {
 438                cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
 439                ifd_ix = open(cached_idx, O_RDONLY);
 440                if (ifd_ix < 0) {
 441                        close(ifd);
 442                        return 0;
 443                }
 444        }
 445
 446        fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
 447                sha1_to_hex(sha1));
 448
 449        if (pack_to_stdout) {
 450                if (copy_fd(ifd, 1))
 451                        exit(1);
 452                close(ifd);
 453        }
 454        else {
 455                char name[PATH_MAX];
 456                snprintf(name, sizeof(name),
 457                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
 458                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 459                if (ofd < 0)
 460                        die("unable to open %s (%s)", name, strerror(errno));
 461                if (copy_fd(ifd, ofd))
 462                        exit(1);
 463                close(ifd);
 464
 465                snprintf(name, sizeof(name),
 466                         "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
 467                ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
 468                if (ofd < 0)
 469                        die("unable to open %s (%s)", name, strerror(errno));
 470                if (copy_fd(ifd_ix, ofd))
 471                        exit(1);
 472                close(ifd_ix);
 473                puts(sha1_to_hex(sha1));
 474        }
 475
 476        return 1;
 477}
 478
 479int main(int argc, char **argv)
 480{
 481        SHA_CTX ctx;
 482        char line[PATH_MAX + 20];
 483        int window = 10, depth = 10, pack_to_stdout = 0;
 484        struct object_entry **list;
 485        int i;
 486        struct timeval prev_tv;
 487        int eye_candy = 0;
 488        int eye_candy_incr = 500;
 489
 490
 491        setup_git_directory();
 492
 493        for (i = 1; i < argc; i++) {
 494                const char *arg = argv[i];
 495
 496                if (*arg == '-') {
 497                        if (!strcmp("--non-empty", arg)) {
 498                                non_empty = 1;
 499                                continue;
 500                        }
 501                        if (!strcmp("--local", arg)) {
 502                                local = 1;
 503                                continue;
 504                        }
 505                        if (!strcmp("--incremental", arg)) {
 506                                incremental = 1;
 507                                continue;
 508                        }
 509                        if (!strncmp("--window=", arg, 9)) {
 510                                char *end;
 511                                window = strtoul(arg+9, &end, 0);
 512                                if (!arg[9] || *end)
 513                                        usage(pack_usage);
 514                                continue;
 515                        }
 516                        if (!strncmp("--depth=", arg, 8)) {
 517                                char *end;
 518                                depth = strtoul(arg+8, &end, 0);
 519                                if (!arg[8] || *end)
 520                                        usage(pack_usage);
 521                                continue;
 522                        }
 523                        if (!strcmp("--stdout", arg)) {
 524                                pack_to_stdout = 1;
 525                                continue;
 526                        }
 527                        usage(pack_usage);
 528                }
 529                if (base_name)
 530                        usage(pack_usage);
 531                base_name = arg;
 532        }
 533
 534        if (pack_to_stdout != !base_name)
 535                usage(pack_usage);
 536
 537        progress = isatty(2);
 538
 539        prepare_packed_git();
 540        if (progress) {
 541                fprintf(stderr, "Generating pack...\n");
 542                gettimeofday(&prev_tv, NULL);
 543        }
 544        while (fgets(line, sizeof(line), stdin) != NULL) {
 545                unsigned int hash;
 546                char *p;
 547                unsigned char sha1[20];
 548
 549                if (progress && (eye_candy <= nr_objects)) {
 550                        fprintf(stderr, "Counting objects...%d\r", nr_objects);
 551                        if (eye_candy && (50 <= eye_candy_incr)) {
 552                                struct timeval tv;
 553                                int time_diff;
 554                                gettimeofday(&tv, NULL);
 555                                time_diff = (tv.tv_sec - prev_tv.tv_sec);
 556                                time_diff <<= 10;
 557                                time_diff += (tv.tv_usec - prev_tv.tv_usec);
 558                                if ((1 << 9) < time_diff)
 559                                        eye_candy_incr += 50;
 560                                else if (50 < eye_candy_incr)
 561                                        eye_candy_incr -= 50;
 562                        }
 563                        eye_candy += eye_candy_incr;
 564                }
 565                if (get_sha1_hex(line, sha1))
 566                        die("expected sha1, got garbage:\n %s", line);
 567                hash = 0;
 568                p = line+40;
 569                while (*p) {
 570                        unsigned char c = *p++;
 571                        if (isspace(c))
 572                                continue;
 573                        hash = hash * 11 + c;
 574                }
 575                add_object_entry(sha1, hash);
 576        }
 577        if (progress)
 578                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
 579        if (non_empty && !nr_objects)
 580                return 0;
 581
 582        sorted_by_sha = create_sorted_list(sha1_sort);
 583        SHA1_Init(&ctx);
 584        list = sorted_by_sha;
 585        for (i = 0; i < nr_objects; i++) {
 586                struct object_entry *entry = *list++;
 587                SHA1_Update(&ctx, entry->sha1, 20);
 588        }
 589        SHA1_Final(object_list_sha1, &ctx);
 590
 591        if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
 592                ;
 593        else {
 594                prepare_pack(window, depth);
 595                if (!pack_to_stdout) {
 596                        write_index_file();
 597                        puts(sha1_to_hex(object_list_sha1));
 598                }
 599        }
 600        return 0;
 601}