builtin-unpack-objects.con commit Teach git list-objects logic not to follow gitlinks (ea376fa)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "delta.h"
   5#include "pack.h"
   6#include "blob.h"
   7#include "commit.h"
   8#include "tag.h"
   9#include "tree.h"
  10
  11static int dry_run, quiet, recover, has_errors;
  12static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
  13
  14/* We always read in 4kB chunks. */
  15static unsigned char buffer[4096];
  16static unsigned long offset, len, consumed_bytes;
  17static SHA_CTX ctx;
  18
  19/*
  20 * Make sure at least "min" bytes are available in the buffer, and
  21 * return the pointer to the buffer.
  22 */
  23static void *fill(int min)
  24{
  25        if (min <= len)
  26                return buffer + offset;
  27        if (min > sizeof(buffer))
  28                die("cannot fill %d bytes", min);
  29        if (offset) {
  30                SHA1_Update(&ctx, buffer, offset);
  31                memmove(buffer, buffer + offset, len);
  32                offset = 0;
  33        }
  34        do {
  35                int ret = xread(0, buffer + len, sizeof(buffer) - len);
  36                if (ret <= 0) {
  37                        if (!ret)
  38                                die("early EOF");
  39                        die("read error on input: %s", strerror(errno));
  40                }
  41                len += ret;
  42        } while (len < min);
  43        return buffer;
  44}
  45
  46static void use(int bytes)
  47{
  48        if (bytes > len)
  49                die("used more bytes than were available");
  50        len -= bytes;
  51        offset += bytes;
  52        consumed_bytes += bytes;
  53}
  54
  55static void *get_data(unsigned long size)
  56{
  57        z_stream stream;
  58        void *buf = xmalloc(size);
  59
  60        memset(&stream, 0, sizeof(stream));
  61
  62        stream.next_out = buf;
  63        stream.avail_out = size;
  64        stream.next_in = fill(1);
  65        stream.avail_in = len;
  66        inflateInit(&stream);
  67
  68        for (;;) {
  69                int ret = inflate(&stream, 0);
  70                use(len - stream.avail_in);
  71                if (stream.total_out == size && ret == Z_STREAM_END)
  72                        break;
  73                if (ret != Z_OK) {
  74                        error("inflate returned %d\n", ret);
  75                        free(buf);
  76                        buf = NULL;
  77                        if (!recover)
  78                                exit(1);
  79                        has_errors = 1;
  80                        break;
  81                }
  82                stream.next_in = fill(1);
  83                stream.avail_in = len;
  84        }
  85        inflateEnd(&stream);
  86        return buf;
  87}
  88
  89struct delta_info {
  90        unsigned char base_sha1[20];
  91        unsigned long base_offset;
  92        unsigned long size;
  93        void *delta;
  94        unsigned nr;
  95        struct delta_info *next;
  96};
  97
  98static struct delta_info *delta_list;
  99
 100static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 101                              unsigned long base_offset,
 102                              void *delta, unsigned long size)
 103{
 104        struct delta_info *info = xmalloc(sizeof(*info));
 105
 106        hashcpy(info->base_sha1, base_sha1);
 107        info->base_offset = base_offset;
 108        info->size = size;
 109        info->delta = delta;
 110        info->nr = nr;
 111        info->next = delta_list;
 112        delta_list = info;
 113}
 114
 115struct obj_info {
 116        unsigned long offset;
 117        unsigned char sha1[20];
 118};
 119
 120static struct obj_info *obj_list;
 121
 122static void added_object(unsigned nr, enum object_type type,
 123                         void *data, unsigned long size);
 124
 125static void write_object(unsigned nr, enum object_type type,
 126                         void *buf, unsigned long size)
 127{
 128        if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 129                die("failed to write object");
 130        added_object(nr, type, buf, size);
 131}
 132
 133static void resolve_delta(unsigned nr, enum object_type type,
 134                          void *base, unsigned long base_size,
 135                          void *delta, unsigned long delta_size)
 136{
 137        void *result;
 138        unsigned long result_size;
 139
 140        result = patch_delta(base, base_size,
 141                             delta, delta_size,
 142                             &result_size);
 143        if (!result)
 144                die("failed to apply delta");
 145        free(delta);
 146        write_object(nr, type, result, result_size);
 147        free(result);
 148}
 149
 150static void added_object(unsigned nr, enum object_type type,
 151                         void *data, unsigned long size)
 152{
 153        struct delta_info **p = &delta_list;
 154        struct delta_info *info;
 155
 156        while ((info = *p) != NULL) {
 157                if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
 158                    info->base_offset == obj_list[nr].offset) {
 159                        *p = info->next;
 160                        p = &delta_list;
 161                        resolve_delta(info->nr, type, data, size,
 162                                      info->delta, info->size);
 163                        free(info);
 164                        continue;
 165                }
 166                p = &info->next;
 167        }
 168}
 169
 170static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 171                                   unsigned nr)
 172{
 173        void *buf = get_data(size);
 174
 175        if (!dry_run && buf)
 176                write_object(nr, type, buf, size);
 177        free(buf);
 178}
 179
 180static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 181                               unsigned nr)
 182{
 183        void *delta_data, *base;
 184        unsigned long base_size;
 185        unsigned char base_sha1[20];
 186
 187        if (type == OBJ_REF_DELTA) {
 188                hashcpy(base_sha1, fill(20));
 189                use(20);
 190                delta_data = get_data(delta_size);
 191                if (dry_run || !delta_data) {
 192                        free(delta_data);
 193                        return;
 194                }
 195                if (!has_sha1_file(base_sha1)) {
 196                        hashcpy(obj_list[nr].sha1, null_sha1);
 197                        add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 198                        return;
 199                }
 200        } else {
 201                unsigned base_found = 0;
 202                unsigned char *pack, c;
 203                unsigned long base_offset;
 204                unsigned lo, mid, hi;
 205
 206                pack = fill(1);
 207                c = *pack;
 208                use(1);
 209                base_offset = c & 127;
 210                while (c & 128) {
 211                        base_offset += 1;
 212                        if (!base_offset || base_offset & ~(~0UL >> 7))
 213                                die("offset value overflow for delta base object");
 214                        pack = fill(1);
 215                        c = *pack;
 216                        use(1);
 217                        base_offset = (base_offset << 7) + (c & 127);
 218                }
 219                base_offset = obj_list[nr].offset - base_offset;
 220
 221                delta_data = get_data(delta_size);
 222                if (dry_run || !delta_data) {
 223                        free(delta_data);
 224                        return;
 225                }
 226                lo = 0;
 227                hi = nr;
 228                while (lo < hi) {
 229                        mid = (lo + hi)/2;
 230                        if (base_offset < obj_list[mid].offset) {
 231                                hi = mid;
 232                        } else if (base_offset > obj_list[mid].offset) {
 233                                lo = mid + 1;
 234                        } else {
 235                                hashcpy(base_sha1, obj_list[mid].sha1);
 236                                base_found = !is_null_sha1(base_sha1);
 237                                break;
 238                        }
 239                }
 240                if (!base_found) {
 241                        /* The delta base object is itself a delta that
 242                           has not been resolved yet. */
 243                        hashcpy(obj_list[nr].sha1, null_sha1);
 244                        add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 245                        return;
 246                }
 247        }
 248
 249        base = read_sha1_file(base_sha1, &type, &base_size);
 250        if (!base) {
 251                error("failed to read delta-pack base object %s",
 252                      sha1_to_hex(base_sha1));
 253                if (!recover)
 254                        exit(1);
 255                has_errors = 1;
 256                return;
 257        }
 258        resolve_delta(nr, type, base, base_size, delta_data, delta_size);
 259        free(base);
 260}
 261
 262static void unpack_one(unsigned nr, unsigned total)
 263{
 264        unsigned shift;
 265        unsigned char *pack, c;
 266        unsigned long size;
 267        enum object_type type;
 268
 269        obj_list[nr].offset = consumed_bytes;
 270
 271        pack = fill(1);
 272        c = *pack;
 273        use(1);
 274        type = (c >> 4) & 7;
 275        size = (c & 15);
 276        shift = 4;
 277        while (c & 0x80) {
 278                pack = fill(1);
 279                c = *pack;
 280                use(1);
 281                size += (c & 0x7f) << shift;
 282                shift += 7;
 283        }
 284        if (!quiet) {
 285                static unsigned long last_sec;
 286                static unsigned last_percent;
 287                struct timeval now;
 288                unsigned percentage = ((nr+1) * 100) / total;
 289
 290                gettimeofday(&now, NULL);
 291                if (percentage != last_percent || now.tv_sec != last_sec) {
 292                        last_sec = now.tv_sec;
 293                        last_percent = percentage;
 294                        fprintf(stderr, "%4u%% (%u/%u) done\r",
 295                                        percentage, (nr+1), total);
 296                }
 297        }
 298        switch (type) {
 299        case OBJ_COMMIT:
 300        case OBJ_TREE:
 301        case OBJ_BLOB:
 302        case OBJ_TAG:
 303                unpack_non_delta_entry(type, size, nr);
 304                return;
 305        case OBJ_REF_DELTA:
 306        case OBJ_OFS_DELTA:
 307                unpack_delta_entry(type, size, nr);
 308                return;
 309        default:
 310                error("bad object type %d", type);
 311                has_errors = 1;
 312                if (recover)
 313                        return;
 314                exit(1);
 315        }
 316}
 317
 318static void unpack_all(void)
 319{
 320        int i;
 321        struct pack_header *hdr = fill(sizeof(struct pack_header));
 322        unsigned nr_objects = ntohl(hdr->hdr_entries);
 323
 324        if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 325                die("bad pack file");
 326        if (!pack_version_ok(hdr->hdr_version))
 327                die("unknown pack file version %d", ntohl(hdr->hdr_version));
 328        fprintf(stderr, "Unpacking %d objects\n", nr_objects);
 329
 330        obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 331        use(sizeof(struct pack_header));
 332        for (i = 0; i < nr_objects; i++)
 333                unpack_one(i, nr_objects);
 334        if (delta_list)
 335                die("unresolved deltas left after unpacking");
 336}
 337
 338int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 339{
 340        int i;
 341        unsigned char sha1[20];
 342
 343        git_config(git_default_config);
 344
 345        quiet = !isatty(2);
 346
 347        for (i = 1 ; i < argc; i++) {
 348                const char *arg = argv[i];
 349
 350                if (*arg == '-') {
 351                        if (!strcmp(arg, "-n")) {
 352                                dry_run = 1;
 353                                continue;
 354                        }
 355                        if (!strcmp(arg, "-q")) {
 356                                quiet = 1;
 357                                continue;
 358                        }
 359                        if (!strcmp(arg, "-r")) {
 360                                recover = 1;
 361                                continue;
 362                        }
 363                        if (!prefixcmp(arg, "--pack_header=")) {
 364                                struct pack_header *hdr;
 365                                char *c;
 366
 367                                hdr = (struct pack_header *)buffer;
 368                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 369                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 370                                if (*c != ',')
 371                                        die("bad %s", arg);
 372                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 373                                if (*c)
 374                                        die("bad %s", arg);
 375                                len = sizeof(*hdr);
 376                                continue;
 377                        }
 378                        usage(unpack_usage);
 379                }
 380
 381                /* We don't take any non-flag arguments now.. Maybe some day */
 382                usage(unpack_usage);
 383        }
 384        SHA1_Init(&ctx);
 385        unpack_all();
 386        SHA1_Update(&ctx, buffer, offset);
 387        SHA1_Final(sha1, &ctx);
 388        if (hashcmp(fill(20), sha1))
 389                die("final sha1 did not match");
 390        use(20);
 391
 392        /* Write the last part of the buffer to stdout */
 393        while (len) {
 394                int ret = xwrite(1, buffer + offset, len);
 395                if (ret <= 0)
 396                        break;
 397                len -= ret;
 398                offset += ret;
 399        }
 400
 401        /* All done */
 402        if (!quiet)
 403                fprintf(stderr, "\n");
 404        return has_errors;
 405}