builtin-unpack-objects.con commit git-submodule summary: show commit summary (1cb639e)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "delta.h"
   5#include "pack.h"
   6#include "blob.h"
   7#include "commit.h"
   8#include "tag.h"
   9#include "tree.h"
  10#include "tree-walk.h"
  11#include "progress.h"
  12#include "decorate.h"
  13#include "fsck.h"
  14
  15static int dry_run, quiet, recover, has_errors, strict;
  16static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
  17
  18/* We always read in 4kB chunks. */
  19static unsigned char buffer[4096];
  20static unsigned int offset, len;
  21static off_t consumed_bytes;
  22static SHA_CTX ctx;
  23
  24struct obj_buffer {
  25        char *buffer;
  26        unsigned long size;
  27};
  28
  29static struct decoration obj_decorate;
  30
  31static struct obj_buffer *lookup_object_buffer(struct object *base)
  32{
  33        return lookup_decoration(&obj_decorate, base);
  34}
  35
  36static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
  37{
  38        struct obj_buffer *obj;
  39        obj = xcalloc(1, sizeof(struct obj_buffer));
  40        obj->buffer = buffer;
  41        obj->size = size;
  42        if (add_decoration(&obj_decorate, object, obj))
  43                die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1));
  44}
  45
  46/*
  47 * Make sure at least "min" bytes are available in the buffer, and
  48 * return the pointer to the buffer.
  49 */
  50static void *fill(int min)
  51{
  52        if (min <= len)
  53                return buffer + offset;
  54        if (min > sizeof(buffer))
  55                die("cannot fill %d bytes", min);
  56        if (offset) {
  57                SHA1_Update(&ctx, buffer, offset);
  58                memmove(buffer, buffer + offset, len);
  59                offset = 0;
  60        }
  61        do {
  62                ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
  63                if (ret <= 0) {
  64                        if (!ret)
  65                                die("early EOF");
  66                        die("read error on input: %s", strerror(errno));
  67                }
  68                len += ret;
  69        } while (len < min);
  70        return buffer;
  71}
  72
  73static void use(int bytes)
  74{
  75        if (bytes > len)
  76                die("used more bytes than were available");
  77        len -= bytes;
  78        offset += bytes;
  79
  80        /* make sure off_t is sufficiently large not to wrap */
  81        if (consumed_bytes > consumed_bytes + bytes)
  82                die("pack too large for current definition of off_t");
  83        consumed_bytes += bytes;
  84}
  85
  86static void *get_data(unsigned long size)
  87{
  88        z_stream stream;
  89        void *buf = xmalloc(size);
  90
  91        memset(&stream, 0, sizeof(stream));
  92
  93        stream.next_out = buf;
  94        stream.avail_out = size;
  95        stream.next_in = fill(1);
  96        stream.avail_in = len;
  97        inflateInit(&stream);
  98
  99        for (;;) {
 100                int ret = inflate(&stream, 0);
 101                use(len - stream.avail_in);
 102                if (stream.total_out == size && ret == Z_STREAM_END)
 103                        break;
 104                if (ret != Z_OK) {
 105                        error("inflate returned %d\n", ret);
 106                        free(buf);
 107                        buf = NULL;
 108                        if (!recover)
 109                                exit(1);
 110                        has_errors = 1;
 111                        break;
 112                }
 113                stream.next_in = fill(1);
 114                stream.avail_in = len;
 115        }
 116        inflateEnd(&stream);
 117        return buf;
 118}
 119
 120struct delta_info {
 121        unsigned char base_sha1[20];
 122        unsigned nr;
 123        off_t base_offset;
 124        unsigned long size;
 125        void *delta;
 126        struct delta_info *next;
 127};
 128
 129static struct delta_info *delta_list;
 130
 131static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 132                              off_t base_offset,
 133                              void *delta, unsigned long size)
 134{
 135        struct delta_info *info = xmalloc(sizeof(*info));
 136
 137        hashcpy(info->base_sha1, base_sha1);
 138        info->base_offset = base_offset;
 139        info->size = size;
 140        info->delta = delta;
 141        info->nr = nr;
 142        info->next = delta_list;
 143        delta_list = info;
 144}
 145
 146struct obj_info {
 147        off_t offset;
 148        unsigned char sha1[20];
 149        struct object *obj;
 150};
 151
 152#define FLAG_OPEN (1u<<20)
 153#define FLAG_WRITTEN (1u<<21)
 154
 155static struct obj_info *obj_list;
 156unsigned nr_objects;
 157
 158static void write_cached_object(struct object *obj)
 159{
 160        unsigned char sha1[20];
 161        struct obj_buffer *obj_buf = lookup_object_buffer(obj);
 162        if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0)
 163                die("failed to write object %s", sha1_to_hex(obj->sha1));
 164        obj->flags |= FLAG_WRITTEN;
 165}
 166
 167static int check_object(struct object *obj, int type, void *data)
 168{
 169        if (!obj)
 170                return 0;
 171
 172        if (obj->flags & FLAG_WRITTEN)
 173                return 1;
 174
 175        if (type != OBJ_ANY && obj->type != type)
 176                die("object type mismatch");
 177
 178        if (!(obj->flags & FLAG_OPEN)) {
 179                unsigned long size;
 180                int type = sha1_object_info(obj->sha1, &size);
 181                if (type != obj->type || type <= 0)
 182                        die("object of unexpected type");
 183                obj->flags |= FLAG_WRITTEN;
 184                return 1;
 185        }
 186
 187        if (fsck_object(obj, 1, fsck_error_function))
 188                die("Error in object");
 189        if (!fsck_walk(obj, check_object, 0))
 190                die("Error on reachable objects of %s", sha1_to_hex(obj->sha1));
 191        write_cached_object(obj);
 192        return 1;
 193}
 194
 195static void write_rest(void)
 196{
 197        unsigned i;
 198        for (i = 0; i < nr_objects; i++)
 199                check_object(obj_list[i].obj, OBJ_ANY, 0);
 200}
 201
 202static void added_object(unsigned nr, enum object_type type,
 203                         void *data, unsigned long size);
 204
 205static void write_object(unsigned nr, enum object_type type,
 206                         void *buf, unsigned long size)
 207{
 208        added_object(nr, type, buf, size);
 209        if (!strict) {
 210                if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 211                        die("failed to write object");
 212                free(buf);
 213                obj_list[nr].obj = 0;
 214        } else if (type == OBJ_BLOB) {
 215                struct blob *blob;
 216                if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 217                        die("failed to write object");
 218                free(buf);
 219
 220                blob = lookup_blob(obj_list[nr].sha1);
 221                if (blob)
 222                        blob->object.flags |= FLAG_WRITTEN;
 223                else
 224                        die("invalid blob object");
 225                obj_list[nr].obj = 0;
 226        } else {
 227                struct object *obj;
 228                int eaten;
 229                hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
 230                obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten);
 231                if (!obj)
 232                        die("invalid %s", typename(type));
 233                /* buf is stored via add_object_buffer and in obj, if its a tree or commit */
 234                add_object_buffer(obj, buf, size);
 235                obj->flags |= FLAG_OPEN;
 236                obj_list[nr].obj = obj;
 237        }
 238}
 239
 240static void resolve_delta(unsigned nr, enum object_type type,
 241                          void *base, unsigned long base_size,
 242                          void *delta, unsigned long delta_size)
 243{
 244        void *result;
 245        unsigned long result_size;
 246
 247        result = patch_delta(base, base_size,
 248                             delta, delta_size,
 249                             &result_size);
 250        if (!result)
 251                die("failed to apply delta");
 252        free(delta);
 253        write_object(nr, type, result, result_size);
 254}
 255
 256static void added_object(unsigned nr, enum object_type type,
 257                         void *data, unsigned long size)
 258{
 259        struct delta_info **p = &delta_list;
 260        struct delta_info *info;
 261
 262        while ((info = *p) != NULL) {
 263                if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
 264                    info->base_offset == obj_list[nr].offset) {
 265                        *p = info->next;
 266                        p = &delta_list;
 267                        resolve_delta(info->nr, type, data, size,
 268                                      info->delta, info->size);
 269                        free(info);
 270                        continue;
 271                }
 272                p = &info->next;
 273        }
 274}
 275
 276static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 277                                   unsigned nr)
 278{
 279        void *buf = get_data(size);
 280
 281        if (!dry_run && buf)
 282                write_object(nr, type, buf, size);
 283        else
 284                free(buf);
 285}
 286
 287static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 288                               unsigned nr)
 289{
 290        void *delta_data, *base;
 291        unsigned long base_size;
 292        unsigned char base_sha1[20];
 293        struct object *obj;
 294
 295        if (type == OBJ_REF_DELTA) {
 296                hashcpy(base_sha1, fill(20));
 297                use(20);
 298                delta_data = get_data(delta_size);
 299                if (dry_run || !delta_data) {
 300                        free(delta_data);
 301                        return;
 302                }
 303                if (!has_sha1_file(base_sha1)) {
 304                        hashcpy(obj_list[nr].sha1, null_sha1);
 305                        add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 306                        return;
 307                }
 308        } else {
 309                unsigned base_found = 0;
 310                unsigned char *pack, c;
 311                off_t base_offset;
 312                unsigned lo, mid, hi;
 313
 314                pack = fill(1);
 315                c = *pack;
 316                use(1);
 317                base_offset = c & 127;
 318                while (c & 128) {
 319                        base_offset += 1;
 320                        if (!base_offset || MSB(base_offset, 7))
 321                                die("offset value overflow for delta base object");
 322                        pack = fill(1);
 323                        c = *pack;
 324                        use(1);
 325                        base_offset = (base_offset << 7) + (c & 127);
 326                }
 327                base_offset = obj_list[nr].offset - base_offset;
 328
 329                delta_data = get_data(delta_size);
 330                if (dry_run || !delta_data) {
 331                        free(delta_data);
 332                        return;
 333                }
 334                lo = 0;
 335                hi = nr;
 336                while (lo < hi) {
 337                        mid = (lo + hi)/2;
 338                        if (base_offset < obj_list[mid].offset) {
 339                                hi = mid;
 340                        } else if (base_offset > obj_list[mid].offset) {
 341                                lo = mid + 1;
 342                        } else {
 343                                hashcpy(base_sha1, obj_list[mid].sha1);
 344                                base_found = !is_null_sha1(base_sha1);
 345                                break;
 346                        }
 347                }
 348                if (!base_found) {
 349                        /* The delta base object is itself a delta that
 350                           has not been resolved yet. */
 351                        hashcpy(obj_list[nr].sha1, null_sha1);
 352                        add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 353                        return;
 354                }
 355        }
 356
 357        obj = lookup_object(base_sha1);
 358        if (obj) {
 359                struct obj_buffer *obj_buf = lookup_object_buffer(obj);
 360                if (obj_buf) {
 361                        resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size);
 362                        return;
 363                }
 364        }
 365
 366        base = read_sha1_file(base_sha1, &type, &base_size);
 367        if (!base) {
 368                error("failed to read delta-pack base object %s",
 369                      sha1_to_hex(base_sha1));
 370                if (!recover)
 371                        exit(1);
 372                has_errors = 1;
 373                return;
 374        }
 375        resolve_delta(nr, type, base, base_size, delta_data, delta_size);
 376        free(base);
 377}
 378
 379static void unpack_one(unsigned nr)
 380{
 381        unsigned shift;
 382        unsigned char *pack, c;
 383        unsigned long size;
 384        enum object_type type;
 385
 386        obj_list[nr].offset = consumed_bytes;
 387
 388        pack = fill(1);
 389        c = *pack;
 390        use(1);
 391        type = (c >> 4) & 7;
 392        size = (c & 15);
 393        shift = 4;
 394        while (c & 0x80) {
 395                pack = fill(1);
 396                c = *pack;
 397                use(1);
 398                size += (c & 0x7f) << shift;
 399                shift += 7;
 400        }
 401
 402        switch (type) {
 403        case OBJ_COMMIT:
 404        case OBJ_TREE:
 405        case OBJ_BLOB:
 406        case OBJ_TAG:
 407                unpack_non_delta_entry(type, size, nr);
 408                return;
 409        case OBJ_REF_DELTA:
 410        case OBJ_OFS_DELTA:
 411                unpack_delta_entry(type, size, nr);
 412                return;
 413        default:
 414                error("bad object type %d", type);
 415                has_errors = 1;
 416                if (recover)
 417                        return;
 418                exit(1);
 419        }
 420}
 421
 422static void unpack_all(void)
 423{
 424        int i;
 425        struct progress *progress = NULL;
 426        struct pack_header *hdr = fill(sizeof(struct pack_header));
 427
 428        nr_objects = ntohl(hdr->hdr_entries);
 429
 430        if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 431                die("bad pack file");
 432        if (!pack_version_ok(hdr->hdr_version))
 433                die("unknown pack file version %d", ntohl(hdr->hdr_version));
 434        use(sizeof(struct pack_header));
 435
 436        if (!quiet)
 437                progress = start_progress("Unpacking objects", nr_objects);
 438        obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 439        memset(obj_list, 0, nr_objects * sizeof(*obj_list));
 440        for (i = 0; i < nr_objects; i++) {
 441                unpack_one(i);
 442                display_progress(progress, i + 1);
 443        }
 444        stop_progress(&progress);
 445
 446        if (delta_list)
 447                die("unresolved deltas left after unpacking");
 448}
 449
 450int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 451{
 452        int i;
 453        unsigned char sha1[20];
 454
 455        git_config(git_default_config);
 456
 457        quiet = !isatty(2);
 458
 459        for (i = 1 ; i < argc; i++) {
 460                const char *arg = argv[i];
 461
 462                if (*arg == '-') {
 463                        if (!strcmp(arg, "-n")) {
 464                                dry_run = 1;
 465                                continue;
 466                        }
 467                        if (!strcmp(arg, "-q")) {
 468                                quiet = 1;
 469                                continue;
 470                        }
 471                        if (!strcmp(arg, "-r")) {
 472                                recover = 1;
 473                                continue;
 474                        }
 475                        if (!strcmp(arg, "--strict")) {
 476                                strict = 1;
 477                                continue;
 478                        }
 479                        if (!prefixcmp(arg, "--pack_header=")) {
 480                                struct pack_header *hdr;
 481                                char *c;
 482
 483                                hdr = (struct pack_header *)buffer;
 484                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 485                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 486                                if (*c != ',')
 487                                        die("bad %s", arg);
 488                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 489                                if (*c)
 490                                        die("bad %s", arg);
 491                                len = sizeof(*hdr);
 492                                continue;
 493                        }
 494                        usage(unpack_usage);
 495                }
 496
 497                /* We don't take any non-flag arguments now.. Maybe some day */
 498                usage(unpack_usage);
 499        }
 500        SHA1_Init(&ctx);
 501        unpack_all();
 502        SHA1_Update(&ctx, buffer, offset);
 503        SHA1_Final(sha1, &ctx);
 504        if (strict)
 505                write_rest();
 506        if (hashcmp(fill(20), sha1))
 507                die("final sha1 did not match");
 508        use(20);
 509
 510        /* Write the last part of the buffer to stdout */
 511        while (len) {
 512                int ret = xwrite(1, buffer + offset, len);
 513                if (ret <= 0)
 514                        break;
 515                len -= ret;
 516                offset += ret;
 517        }
 518
 519        /* All done */
 520        return has_errors;
 521}