builtin-unpack-objects.con commit git-rm: update to saner semantics (9f95069)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "delta.h"
   5#include "pack.h"
   6#include "blob.h"
   7#include "commit.h"
   8#include "tag.h"
   9#include "tree.h"
  10
  11static int dry_run, quiet, recover, has_errors;
  12static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
  13
  14/* We always read in 4kB chunks. */
  15static unsigned char buffer[4096];
  16static unsigned long offset, len, consumed_bytes;
  17static SHA_CTX ctx;
  18
  19/*
  20 * Make sure at least "min" bytes are available in the buffer, and
  21 * return the pointer to the buffer.
  22 */
  23static void *fill(int min)
  24{
  25        if (min <= len)
  26                return buffer + offset;
  27        if (min > sizeof(buffer))
  28                die("cannot fill %d bytes", min);
  29        if (offset) {
  30                SHA1_Update(&ctx, buffer, offset);
  31                memmove(buffer, buffer + offset, len);
  32                offset = 0;
  33        }
  34        do {
  35                int ret = xread(0, buffer + len, sizeof(buffer) - len);
  36                if (ret <= 0) {
  37                        if (!ret)
  38                                die("early EOF");
  39                        die("read error on input: %s", strerror(errno));
  40                }
  41                len += ret;
  42        } while (len < min);
  43        return buffer;
  44}
  45
  46static void use(int bytes)
  47{
  48        if (bytes > len)
  49                die("used more bytes than were available");
  50        len -= bytes;
  51        offset += bytes;
  52        consumed_bytes += bytes;
  53}
  54
  55static void *get_data(unsigned long size)
  56{
  57        z_stream stream;
  58        void *buf = xmalloc(size);
  59
  60        memset(&stream, 0, sizeof(stream));
  61
  62        stream.next_out = buf;
  63        stream.avail_out = size;
  64        stream.next_in = fill(1);
  65        stream.avail_in = len;
  66        inflateInit(&stream);
  67
  68        for (;;) {
  69                int ret = inflate(&stream, 0);
  70                use(len - stream.avail_in);
  71                if (stream.total_out == size && ret == Z_STREAM_END)
  72                        break;
  73                if (ret != Z_OK) {
  74                        error("inflate returned %d\n", ret);
  75                        free(buf);
  76                        buf = NULL;
  77                        if (!recover)
  78                                exit(1);
  79                        has_errors = 1;
  80                        break;
  81                }
  82                stream.next_in = fill(1);
  83                stream.avail_in = len;
  84        }
  85        inflateEnd(&stream);
  86        return buf;
  87}
  88
  89struct delta_info {
  90        unsigned char base_sha1[20];
  91        unsigned long base_offset;
  92        unsigned long size;
  93        void *delta;
  94        unsigned nr;
  95        struct delta_info *next;
  96};
  97
  98static struct delta_info *delta_list;
  99
 100static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 101                              unsigned long base_offset,
 102                              void *delta, unsigned long size)
 103{
 104        struct delta_info *info = xmalloc(sizeof(*info));
 105
 106        hashcpy(info->base_sha1, base_sha1);
 107        info->base_offset = base_offset;
 108        info->size = size;
 109        info->delta = delta;
 110        info->nr = nr;
 111        info->next = delta_list;
 112        delta_list = info;
 113}
 114
 115struct obj_info {
 116        unsigned long offset;
 117        unsigned char sha1[20];
 118};
 119
 120static struct obj_info *obj_list;
 121
 122static void added_object(unsigned nr, const char *type, void *data,
 123                         unsigned long size);
 124
 125static void write_object(unsigned nr, void *buf, unsigned long size,
 126                         const char *type)
 127{
 128        if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
 129                die("failed to write object");
 130        added_object(nr, type, buf, size);
 131}
 132
 133static void resolve_delta(unsigned nr, const char *type,
 134                          void *base, unsigned long base_size,
 135                          void *delta, unsigned long delta_size)
 136{
 137        void *result;
 138        unsigned long result_size;
 139
 140        result = patch_delta(base, base_size,
 141                             delta, delta_size,
 142                             &result_size);
 143        if (!result)
 144                die("failed to apply delta");
 145        free(delta);
 146        write_object(nr, result, result_size, type);
 147        free(result);
 148}
 149
 150static void added_object(unsigned nr, const char *type, void *data,
 151                         unsigned long size)
 152{
 153        struct delta_info **p = &delta_list;
 154        struct delta_info *info;
 155
 156        while ((info = *p) != NULL) {
 157                if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
 158                    info->base_offset == obj_list[nr].offset) {
 159                        *p = info->next;
 160                        p = &delta_list;
 161                        resolve_delta(info->nr, type, data, size,
 162                                      info->delta, info->size);
 163                        free(info);
 164                        continue;
 165                }
 166                p = &info->next;
 167        }
 168}
 169
 170static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
 171                                   unsigned nr)
 172{
 173        void *buf = get_data(size);
 174        const char *type;
 175
 176        switch (kind) {
 177        case OBJ_COMMIT: type = commit_type; break;
 178        case OBJ_TREE:   type = tree_type; break;
 179        case OBJ_BLOB:   type = blob_type; break;
 180        case OBJ_TAG:    type = tag_type; break;
 181        default: die("bad type %d", kind);
 182        }
 183        if (!dry_run && buf)
 184                write_object(nr, buf, size, type);
 185        free(buf);
 186}
 187
 188static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
 189                               unsigned nr)
 190{
 191        void *delta_data, *base;
 192        unsigned long base_size;
 193        char type[20];
 194        unsigned char base_sha1[20];
 195
 196        if (kind == OBJ_REF_DELTA) {
 197                hashcpy(base_sha1, fill(20));
 198                use(20);
 199                delta_data = get_data(delta_size);
 200                if (dry_run || !delta_data) {
 201                        free(delta_data);
 202                        return;
 203                }
 204                if (!has_sha1_file(base_sha1)) {
 205                        hashcpy(obj_list[nr].sha1, null_sha1);
 206                        add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 207                        return;
 208                }
 209        } else {
 210                unsigned base_found = 0;
 211                unsigned char *pack, c;
 212                unsigned long base_offset;
 213                unsigned lo, mid, hi;
 214
 215                pack = fill(1);
 216                c = *pack;
 217                use(1);
 218                base_offset = c & 127;
 219                while (c & 128) {
 220                        base_offset += 1;
 221                        if (!base_offset || base_offset & ~(~0UL >> 7))
 222                                die("offset value overflow for delta base object");
 223                        pack = fill(1);
 224                        c = *pack;
 225                        use(1);
 226                        base_offset = (base_offset << 7) + (c & 127);
 227                }
 228                base_offset = obj_list[nr].offset - base_offset;
 229
 230                delta_data = get_data(delta_size);
 231                if (dry_run || !delta_data) {
 232                        free(delta_data);
 233                        return;
 234                }
 235                lo = 0;
 236                hi = nr;
 237                while (lo < hi) {
 238                        mid = (lo + hi)/2;
 239                        if (base_offset < obj_list[mid].offset) {
 240                                hi = mid;
 241                        } else if (base_offset > obj_list[mid].offset) {
 242                                lo = mid + 1;
 243                        } else {
 244                                hashcpy(base_sha1, obj_list[mid].sha1);
 245                                base_found = !is_null_sha1(base_sha1);
 246                                break;
 247                        }
 248                }
 249                if (!base_found) {
 250                        /* The delta base object is itself a delta that
 251                           has not been resolved yet. */
 252                        hashcpy(obj_list[nr].sha1, null_sha1);
 253                        add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 254                        return;
 255                }
 256        }
 257
 258        base = read_sha1_file(base_sha1, type, &base_size);
 259        if (!base) {
 260                error("failed to read delta-pack base object %s",
 261                      sha1_to_hex(base_sha1));
 262                if (!recover)
 263                        exit(1);
 264                has_errors = 1;
 265                return;
 266        }
 267        resolve_delta(nr, type, base, base_size, delta_data, delta_size);
 268        free(base);
 269}
 270
 271static void unpack_one(unsigned nr, unsigned total)
 272{
 273        unsigned shift;
 274        unsigned char *pack, c;
 275        unsigned long size;
 276        enum object_type type;
 277
 278        obj_list[nr].offset = consumed_bytes;
 279
 280        pack = fill(1);
 281        c = *pack;
 282        use(1);
 283        type = (c >> 4) & 7;
 284        size = (c & 15);
 285        shift = 4;
 286        while (c & 0x80) {
 287                pack = fill(1);
 288                c = *pack;
 289                use(1);
 290                size += (c & 0x7f) << shift;
 291                shift += 7;
 292        }
 293        if (!quiet) {
 294                static unsigned long last_sec;
 295                static unsigned last_percent;
 296                struct timeval now;
 297                unsigned percentage = ((nr+1) * 100) / total;
 298
 299                gettimeofday(&now, NULL);
 300                if (percentage != last_percent || now.tv_sec != last_sec) {
 301                        last_sec = now.tv_sec;
 302                        last_percent = percentage;
 303                        fprintf(stderr, "%4u%% (%u/%u) done\r",
 304                                        percentage, (nr+1), total);
 305                }
 306        }
 307        switch (type) {
 308        case OBJ_COMMIT:
 309        case OBJ_TREE:
 310        case OBJ_BLOB:
 311        case OBJ_TAG:
 312                unpack_non_delta_entry(type, size, nr);
 313                return;
 314        case OBJ_REF_DELTA:
 315        case OBJ_OFS_DELTA:
 316                unpack_delta_entry(type, size, nr);
 317                return;
 318        default:
 319                error("bad object type %d", type);
 320                has_errors = 1;
 321                if (recover)
 322                        return;
 323                exit(1);
 324        }
 325}
 326
 327static void unpack_all(void)
 328{
 329        int i;
 330        struct pack_header *hdr = fill(sizeof(struct pack_header));
 331        unsigned nr_objects = ntohl(hdr->hdr_entries);
 332
 333        if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 334                die("bad pack file");
 335        if (!pack_version_ok(hdr->hdr_version))
 336                die("unknown pack file version %d", ntohl(hdr->hdr_version));
 337        fprintf(stderr, "Unpacking %d objects\n", nr_objects);
 338
 339        obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 340        use(sizeof(struct pack_header));
 341        for (i = 0; i < nr_objects; i++)
 342                unpack_one(i, nr_objects);
 343        if (delta_list)
 344                die("unresolved deltas left after unpacking");
 345}
 346
 347int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 348{
 349        int i;
 350        unsigned char sha1[20];
 351
 352        git_config(git_default_config);
 353
 354        quiet = !isatty(2);
 355
 356        for (i = 1 ; i < argc; i++) {
 357                const char *arg = argv[i];
 358
 359                if (*arg == '-') {
 360                        if (!strcmp(arg, "-n")) {
 361                                dry_run = 1;
 362                                continue;
 363                        }
 364                        if (!strcmp(arg, "-q")) {
 365                                quiet = 1;
 366                                continue;
 367                        }
 368                        if (!strcmp(arg, "-r")) {
 369                                recover = 1;
 370                                continue;
 371                        }
 372                        if (!strncmp(arg, "--pack_header=", 14)) {
 373                                struct pack_header *hdr;
 374                                char *c;
 375
 376                                hdr = (struct pack_header *)buffer;
 377                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 378                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 379                                if (*c != ',')
 380                                        die("bad %s", arg);
 381                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 382                                if (*c)
 383                                        die("bad %s", arg);
 384                                len = sizeof(*hdr);
 385                                continue;
 386                        }
 387                        usage(unpack_usage);
 388                }
 389
 390                /* We don't take any non-flag arguments now.. Maybe some day */
 391                usage(unpack_usage);
 392        }
 393        SHA1_Init(&ctx);
 394        unpack_all();
 395        SHA1_Update(&ctx, buffer, offset);
 396        SHA1_Final(sha1, &ctx);
 397        if (hashcmp(fill(20), sha1))
 398                die("final sha1 did not match");
 399        use(20);
 400
 401        /* Write the last part of the buffer to stdout */
 402        while (len) {
 403                int ret = xwrite(1, buffer + offset, len);
 404                if (ret <= 0)
 405                        break;
 406                len -= ret;
 407                offset += ret;
 408        }
 409
 410        /* All done */
 411        if (!quiet)
 412                fprintf(stderr, "\n");
 413        return has_errors;
 414}