index-pack.con commit Merge branch 'maint' (a622f6b)
   1#include "cache.h"
   2#include "delta.h"
   3#include "pack.h"
   4#include "csum-file.h"
   5#include "blob.h"
   6#include "commit.h"
   7#include "tag.h"
   8#include "tree.h"
   9
  10static const char index_pack_usage[] =
  11"git-index-pack [-o index-file] pack-file";
  12
  13struct object_entry
  14{
  15        unsigned long offset;
  16        unsigned long size;
  17        unsigned int hdr_size;
  18        enum object_type type;
  19        enum object_type real_type;
  20        unsigned char sha1[20];
  21};
  22
  23union delta_base {
  24        unsigned char sha1[20];
  25        unsigned long offset;
  26};
  27
  28/*
  29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
  30 * to memcmp() only the first 20 bytes.
  31 */
  32#define UNION_BASE_SZ   20
  33
  34struct delta_entry
  35{
  36        struct object_entry *obj;
  37        union delta_base base;
  38};
  39
  40static const char *pack_name;
  41static struct object_entry *objects;
  42static struct delta_entry *deltas;
  43static int nr_objects;
  44static int nr_deltas;
  45
  46/* We always read in 4kB chunks. */
  47static unsigned char input_buffer[4096];
  48static unsigned long input_offset, input_len, consumed_bytes;
  49static SHA_CTX input_ctx;
  50static int input_fd;
  51
  52/*
  53 * Make sure at least "min" bytes are available in the buffer, and
  54 * return the pointer to the buffer.
  55 */
  56static void *fill(int min)
  57{
  58        if (min <= input_len)
  59                return input_buffer + input_offset;
  60        if (min > sizeof(input_buffer))
  61                die("cannot fill %d bytes", min);
  62        if (input_offset) {
  63                SHA1_Update(&input_ctx, input_buffer, input_offset);
  64                memmove(input_buffer, input_buffer + input_offset, input_len);
  65                input_offset = 0;
  66        }
  67        do {
  68                int ret = xread(input_fd, input_buffer + input_len,
  69                                sizeof(input_buffer) - input_len);
  70                if (ret <= 0) {
  71                        if (!ret)
  72                                die("early EOF");
  73                        die("read error on input: %s", strerror(errno));
  74                }
  75                input_len += ret;
  76        } while (input_len < min);
  77        return input_buffer;
  78}
  79
  80static void use(int bytes)
  81{
  82        if (bytes > input_len)
  83                die("used more bytes than were available");
  84        input_len -= bytes;
  85        input_offset += bytes;
  86        consumed_bytes += bytes;
  87}
  88
  89static void open_pack_file(void)
  90{
  91        input_fd = open(pack_name, O_RDONLY);
  92        if (input_fd < 0)
  93                die("cannot open packfile '%s': %s", pack_name,
  94                    strerror(errno));
  95        SHA1_Init(&input_ctx);
  96}
  97
  98static void parse_pack_header(void)
  99{
 100        struct pack_header *hdr = fill(sizeof(struct pack_header));
 101
 102        /* Header consistency check */
 103        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
 104                die("packfile '%s' signature mismatch", pack_name);
 105        if (!pack_version_ok(hdr->hdr_version))
 106                die("packfile '%s' version %d unsupported",
 107                    pack_name, ntohl(hdr->hdr_version));
 108
 109        nr_objects = ntohl(hdr->hdr_entries);
 110        use(sizeof(struct pack_header));
 111        /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
 112}
 113
 114static void bad_object(unsigned long offset, const char *format,
 115                       ...) NORETURN __attribute__((format (printf, 2, 3)));
 116
 117static void bad_object(unsigned long offset, const char *format, ...)
 118{
 119        va_list params;
 120        char buf[1024];
 121
 122        va_start(params, format);
 123        vsnprintf(buf, sizeof(buf), format, params);
 124        va_end(params);
 125        die("packfile '%s': bad object at offset %lu: %s",
 126            pack_name, offset, buf);
 127}
 128
 129static void *unpack_entry_data(unsigned long offset, unsigned long size)
 130{
 131        z_stream stream;
 132        void *buf = xmalloc(size);
 133
 134        memset(&stream, 0, sizeof(stream));
 135        stream.next_out = buf;
 136        stream.avail_out = size;
 137        stream.next_in = fill(1);
 138        stream.avail_in = input_len;
 139        inflateInit(&stream);
 140
 141        for (;;) {
 142                int ret = inflate(&stream, 0);
 143                use(input_len - stream.avail_in);
 144                if (stream.total_out == size && ret == Z_STREAM_END)
 145                        break;
 146                if (ret != Z_OK)
 147                        bad_object(offset, "inflate returned %d", ret);
 148                stream.next_in = fill(1);
 149                stream.avail_in = input_len;
 150        }
 151        inflateEnd(&stream);
 152        return buf;
 153}
 154
 155static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 156{
 157        unsigned char *p, c;
 158        unsigned long size, base_offset;
 159        unsigned shift;
 160
 161        obj->offset = consumed_bytes;
 162
 163        p = fill(1);
 164        c = *p;
 165        use(1);
 166        obj->type = (c >> 4) & 7;
 167        size = (c & 15);
 168        shift = 4;
 169        while (c & 0x80) {
 170                p = fill(1);
 171                c = *p;
 172                use(1);
 173                size += (c & 0x7fUL) << shift;
 174                shift += 7;
 175        }
 176        obj->size = size;
 177
 178        switch (obj->type) {
 179        case OBJ_REF_DELTA:
 180                hashcpy(delta_base->sha1, fill(20));
 181                use(20);
 182                break;
 183        case OBJ_OFS_DELTA:
 184                memset(delta_base, 0, sizeof(*delta_base));
 185                p = fill(1);
 186                c = *p;
 187                use(1);
 188                base_offset = c & 127;
 189                while (c & 128) {
 190                        base_offset += 1;
 191                        if (!base_offset || base_offset & ~(~0UL >> 7))
 192                                bad_object(obj->offset, "offset value overflow for delta base object");
 193                        p = fill(1);
 194                        c = *p;
 195                        use(1);
 196                        base_offset = (base_offset << 7) + (c & 127);
 197                }
 198                delta_base->offset = obj->offset - base_offset;
 199                if (delta_base->offset >= obj->offset)
 200                        bad_object(obj->offset, "delta base offset is out of bound");
 201                break;
 202        case OBJ_COMMIT:
 203        case OBJ_TREE:
 204        case OBJ_BLOB:
 205        case OBJ_TAG:
 206                break;
 207        default:
 208                bad_object(obj->offset, "bad object type %d", obj->type);
 209        }
 210        obj->hdr_size = consumed_bytes - obj->offset;
 211
 212        return unpack_entry_data(obj->offset, obj->size);
 213}
 214
 215static void * get_data_from_pack(struct object_entry *obj)
 216{
 217        unsigned long from = obj[0].offset + obj[0].hdr_size;
 218        unsigned long len = obj[1].offset - from;
 219        unsigned pg_offset = from % getpagesize();
 220        unsigned char *map, *data;
 221        z_stream stream;
 222        int st;
 223
 224        map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
 225                   input_fd, from - pg_offset);
 226        if (map == MAP_FAILED)
 227                die("cannot mmap packfile '%s': %s", pack_name, strerror(errno));
 228        data = xmalloc(obj->size);
 229        memset(&stream, 0, sizeof(stream));
 230        stream.next_out = data;
 231        stream.avail_out = obj->size;
 232        stream.next_in = map + pg_offset;
 233        stream.avail_in = len;
 234        inflateInit(&stream);
 235        while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
 236        inflateEnd(&stream);
 237        if (st != Z_STREAM_END || stream.total_out != obj->size)
 238                die("serious inflate inconsistency");
 239        munmap(map, len + pg_offset);
 240        return data;
 241}
 242
 243static int find_delta(const union delta_base *base)
 244{
 245        int first = 0, last = nr_deltas;
 246
 247        while (first < last) {
 248                int next = (first + last) / 2;
 249                struct delta_entry *delta = &deltas[next];
 250                int cmp;
 251
 252                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
 253                if (!cmp)
 254                        return next;
 255                if (cmp < 0) {
 256                        last = next;
 257                        continue;
 258                }
 259                first = next+1;
 260        }
 261        return -first-1;
 262}
 263
 264static int find_delta_childs(const union delta_base *base,
 265                             int *first_index, int *last_index)
 266{
 267        int first = find_delta(base);
 268        int last = first;
 269        int end = nr_deltas - 1;
 270
 271        if (first < 0)
 272                return -1;
 273        while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 274                --first;
 275        while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 276                ++last;
 277        *first_index = first;
 278        *last_index = last;
 279        return 0;
 280}
 281
 282static void sha1_object(const void *data, unsigned long size,
 283                        enum object_type type, unsigned char *sha1)
 284{
 285        SHA_CTX ctx;
 286        char header[50];
 287        int header_size;
 288        const char *type_str;
 289
 290        switch (type) {
 291        case OBJ_COMMIT: type_str = commit_type; break;
 292        case OBJ_TREE:   type_str = tree_type; break;
 293        case OBJ_BLOB:   type_str = blob_type; break;
 294        case OBJ_TAG:    type_str = tag_type; break;
 295        default:
 296                die("bad type %d", type);
 297        }
 298
 299        header_size = sprintf(header, "%s %lu", type_str, size) + 1;
 300
 301        SHA1_Init(&ctx);
 302        SHA1_Update(&ctx, header, header_size);
 303        SHA1_Update(&ctx, data, size);
 304        SHA1_Final(sha1, &ctx);
 305}
 306
 307static void resolve_delta(struct delta_entry *delta, void *base_data,
 308                          unsigned long base_size, enum object_type type)
 309{
 310        struct object_entry *obj = delta->obj;
 311        void *delta_data;
 312        unsigned long delta_size;
 313        void *result;
 314        unsigned long result_size;
 315        union delta_base delta_base;
 316        int j, first, last;
 317
 318        obj->real_type = type;
 319        delta_data = get_data_from_pack(obj);
 320        delta_size = obj->size;
 321        result = patch_delta(base_data, base_size, delta_data, delta_size,
 322                             &result_size);
 323        free(delta_data);
 324        if (!result)
 325                bad_object(obj->offset, "failed to apply delta");
 326        sha1_object(result, result_size, type, obj->sha1);
 327
 328        hashcpy(delta_base.sha1, obj->sha1);
 329        if (!find_delta_childs(&delta_base, &first, &last)) {
 330                for (j = first; j <= last; j++)
 331                        if (deltas[j].obj->type == OBJ_REF_DELTA)
 332                                resolve_delta(&deltas[j], result, result_size, type);
 333        }
 334
 335        memset(&delta_base, 0, sizeof(delta_base));
 336        delta_base.offset = obj->offset;
 337        if (!find_delta_childs(&delta_base, &first, &last)) {
 338                for (j = first; j <= last; j++)
 339                        if (deltas[j].obj->type == OBJ_OFS_DELTA)
 340                                resolve_delta(&deltas[j], result, result_size, type);
 341        }
 342
 343        free(result);
 344}
 345
 346static int compare_delta_entry(const void *a, const void *b)
 347{
 348        const struct delta_entry *delta_a = a;
 349        const struct delta_entry *delta_b = b;
 350        return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 351}
 352
 353/* Parse all objects and return the pack content SHA1 hash */
 354static void parse_pack_objects(unsigned char *sha1)
 355{
 356        int i;
 357        struct delta_entry *delta = deltas;
 358        void *data;
 359        struct stat st;
 360
 361        /*
 362         * First pass:
 363         * - find locations of all objects;
 364         * - calculate SHA1 of all non-delta objects;
 365         * - remember base SHA1 for all deltas.
 366         */
 367        for (i = 0; i < nr_objects; i++) {
 368                struct object_entry *obj = &objects[i];
 369                data = unpack_raw_entry(obj, &delta->base);
 370                obj->real_type = obj->type;
 371                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 372                        nr_deltas++;
 373                        delta->obj = obj;
 374                        delta++;
 375                } else
 376                        sha1_object(data, obj->size, obj->type, obj->sha1);
 377                free(data);
 378        }
 379        objects[i].offset = consumed_bytes;
 380
 381        /* Check pack integrity */
 382        SHA1_Update(&input_ctx, input_buffer, input_offset);
 383        SHA1_Final(sha1, &input_ctx);
 384        if (hashcmp(fill(20), sha1))
 385                die("packfile '%s' SHA1 mismatch", pack_name);
 386        use(20);
 387
 388        /* If input_fd is a file, we should have reached its end now. */
 389        if (fstat(input_fd, &st))
 390                die("cannot fstat packfile '%s': %s", pack_name, strerror(errno));
 391        if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
 392                die("packfile '%s' has junk at the end", pack_name);
 393
 394        /* Sort deltas by base SHA1/offset for fast searching */
 395        qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 396              compare_delta_entry);
 397
 398        /*
 399         * Second pass:
 400         * - for all non-delta objects, look if it is used as a base for
 401         *   deltas;
 402         * - if used as a base, uncompress the object and apply all deltas,
 403         *   recursively checking if the resulting object is used as a base
 404         *   for some more deltas.
 405         */
 406        for (i = 0; i < nr_objects; i++) {
 407                struct object_entry *obj = &objects[i];
 408                union delta_base base;
 409                int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
 410
 411                if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 412                        continue;
 413                hashcpy(base.sha1, obj->sha1);
 414                ref = !find_delta_childs(&base, &ref_first, &ref_last);
 415                memset(&base, 0, sizeof(base));
 416                base.offset = obj->offset;
 417                ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
 418                if (!ref && !ofs)
 419                        continue;
 420                data = get_data_from_pack(obj);
 421                if (ref)
 422                        for (j = ref_first; j <= ref_last; j++)
 423                                if (deltas[j].obj->type == OBJ_REF_DELTA)
 424                                        resolve_delta(&deltas[j], data,
 425                                                      obj->size, obj->type);
 426                if (ofs)
 427                        for (j = ofs_first; j <= ofs_last; j++)
 428                                if (deltas[j].obj->type == OBJ_OFS_DELTA)
 429                                        resolve_delta(&deltas[j], data,
 430                                                      obj->size, obj->type);
 431                free(data);
 432        }
 433
 434        /* Check for unresolved deltas */
 435        for (i = 0; i < nr_deltas; i++) {
 436                if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
 437                    deltas[i].obj->real_type == OBJ_OFS_DELTA)
 438                        die("packfile '%s' has unresolved deltas",  pack_name);
 439        }
 440}
 441
 442static int sha1_compare(const void *_a, const void *_b)
 443{
 444        struct object_entry *a = *(struct object_entry **)_a;
 445        struct object_entry *b = *(struct object_entry **)_b;
 446        return hashcmp(a->sha1, b->sha1);
 447}
 448
 449/*
 450 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 451 * the SHA1 hash of sorted object names.
 452 */
 453static void write_index_file(const char *index_name, unsigned char *sha1)
 454{
 455        struct sha1file *f;
 456        struct object_entry **sorted_by_sha, **list, **last;
 457        unsigned int array[256];
 458        int i;
 459        SHA_CTX ctx;
 460
 461        if (nr_objects) {
 462                sorted_by_sha =
 463                        xcalloc(nr_objects, sizeof(struct object_entry *));
 464                list = sorted_by_sha;
 465                last = sorted_by_sha + nr_objects;
 466                for (i = 0; i < nr_objects; ++i)
 467                        sorted_by_sha[i] = &objects[i];
 468                qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
 469                      sha1_compare);
 470
 471        }
 472        else
 473                sorted_by_sha = list = last = NULL;
 474
 475        unlink(index_name);
 476        f = sha1create("%s", index_name);
 477
 478        /*
 479         * Write the first-level table (the list is sorted,
 480         * but we use a 256-entry lookup to be able to avoid
 481         * having to do eight extra binary search iterations).
 482         */
 483        for (i = 0; i < 256; i++) {
 484                struct object_entry **next = list;
 485                while (next < last) {
 486                        struct object_entry *obj = *next;
 487                        if (obj->sha1[0] != i)
 488                                break;
 489                        next++;
 490                }
 491                array[i] = htonl(next - sorted_by_sha);
 492                list = next;
 493        }
 494        sha1write(f, array, 256 * sizeof(int));
 495
 496        /* recompute the SHA1 hash of sorted object names.
 497         * currently pack-objects does not do this, but that
 498         * can be fixed.
 499         */
 500        SHA1_Init(&ctx);
 501        /*
 502         * Write the actual SHA1 entries..
 503         */
 504        list = sorted_by_sha;
 505        for (i = 0; i < nr_objects; i++) {
 506                struct object_entry *obj = *list++;
 507                unsigned int offset = htonl(obj->offset);
 508                sha1write(f, &offset, 4);
 509                sha1write(f, obj->sha1, 20);
 510                SHA1_Update(&ctx, obj->sha1, 20);
 511        }
 512        sha1write(f, sha1, 20);
 513        sha1close(f, NULL, 1);
 514        free(sorted_by_sha);
 515        SHA1_Final(sha1, &ctx);
 516}
 517
 518int main(int argc, char **argv)
 519{
 520        int i;
 521        char *index_name = NULL;
 522        char *index_name_buf = NULL;
 523        unsigned char sha1[20];
 524
 525        for (i = 1; i < argc; i++) {
 526                const char *arg = argv[i];
 527
 528                if (*arg == '-') {
 529                        if (!strcmp(arg, "-o")) {
 530                                if (index_name || (i+1) >= argc)
 531                                        usage(index_pack_usage);
 532                                index_name = argv[++i];
 533                        } else
 534                                usage(index_pack_usage);
 535                        continue;
 536                }
 537
 538                if (pack_name)
 539                        usage(index_pack_usage);
 540                pack_name = arg;
 541        }
 542
 543        if (!pack_name)
 544                usage(index_pack_usage);
 545        if (!index_name) {
 546                int len = strlen(pack_name);
 547                if (!has_extension(pack_name, ".pack"))
 548                        die("packfile name '%s' does not end with '.pack'",
 549                            pack_name);
 550                index_name_buf = xmalloc(len);
 551                memcpy(index_name_buf, pack_name, len - 5);
 552                strcpy(index_name_buf + len - 5, ".idx");
 553                index_name = index_name_buf;
 554        }
 555
 556        open_pack_file();
 557        parse_pack_header();
 558        objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
 559        deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
 560        parse_pack_objects(sha1);
 561        free(deltas);
 562        write_index_file(index_name, sha1);
 563        free(objects);
 564        free(index_name_buf);
 565
 566        printf("%s\n", sha1_to_hex(sha1));
 567
 568        return 0;
 569}