builtin-unpack-objects.con commit Merge branch 'dh/pack' (45bde46)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "delta.h"
   5#include "pack.h"
   6#include "blob.h"
   7#include "commit.h"
   8#include "tag.h"
   9#include "tree.h"
  10#include "progress.h"
  11
  12static int dry_run, quiet, recover, has_errors;
  13static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
  14
  15/* We always read in 4kB chunks. */
  16static unsigned char buffer[4096];
  17static unsigned int offset, len;
  18static off_t consumed_bytes;
  19static SHA_CTX ctx;
  20
  21/*
  22 * Make sure at least "min" bytes are available in the buffer, and
  23 * return the pointer to the buffer.
  24 */
  25static void *fill(int min)
  26{
  27        if (min <= len)
  28                return buffer + offset;
  29        if (min > sizeof(buffer))
  30                die("cannot fill %d bytes", min);
  31        if (offset) {
  32                SHA1_Update(&ctx, buffer, offset);
  33                memmove(buffer, buffer + offset, len);
  34                offset = 0;
  35        }
  36        do {
  37                ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
  38                if (ret <= 0) {
  39                        if (!ret)
  40                                die("early EOF");
  41                        die("read error on input: %s", strerror(errno));
  42                }
  43                len += ret;
  44        } while (len < min);
  45        return buffer;
  46}
  47
  48static void use(int bytes)
  49{
  50        if (bytes > len)
  51                die("used more bytes than were available");
  52        len -= bytes;
  53        offset += bytes;
  54
  55        /* make sure off_t is sufficiently large not to wrap */
  56        if (consumed_bytes > consumed_bytes + bytes)
  57                die("pack too large for current definition of off_t");
  58        consumed_bytes += bytes;
  59}
  60
  61static void *get_data(unsigned long size)
  62{
  63        z_stream stream;
  64        void *buf = xmalloc(size);
  65
  66        memset(&stream, 0, sizeof(stream));
  67
  68        stream.next_out = buf;
  69        stream.avail_out = size;
  70        stream.next_in = fill(1);
  71        stream.avail_in = len;
  72        inflateInit(&stream);
  73
  74        for (;;) {
  75                int ret = inflate(&stream, 0);
  76                use(len - stream.avail_in);
  77                if (stream.total_out == size && ret == Z_STREAM_END)
  78                        break;
  79                if (ret != Z_OK) {
  80                        error("inflate returned %d\n", ret);
  81                        free(buf);
  82                        buf = NULL;
  83                        if (!recover)
  84                                exit(1);
  85                        has_errors = 1;
  86                        break;
  87                }
  88                stream.next_in = fill(1);
  89                stream.avail_in = len;
  90        }
  91        inflateEnd(&stream);
  92        return buf;
  93}
  94
  95struct delta_info {
  96        unsigned char base_sha1[20];
  97        unsigned nr;
  98        off_t base_offset;
  99        unsigned long size;
 100        void *delta;
 101        struct delta_info *next;
 102};
 103
 104static struct delta_info *delta_list;
 105
 106static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 107                              off_t base_offset,
 108                              void *delta, unsigned long size)
 109{
 110        struct delta_info *info = xmalloc(sizeof(*info));
 111
 112        hashcpy(info->base_sha1, base_sha1);
 113        info->base_offset = base_offset;
 114        info->size = size;
 115        info->delta = delta;
 116        info->nr = nr;
 117        info->next = delta_list;
 118        delta_list = info;
 119}
 120
 121struct obj_info {
 122        off_t offset;
 123        unsigned char sha1[20];
 124};
 125
 126static struct obj_info *obj_list;
 127
 128static void added_object(unsigned nr, enum object_type type,
 129                         void *data, unsigned long size);
 130
 131static void write_object(unsigned nr, enum object_type type,
 132                         void *buf, unsigned long size)
 133{
 134        if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 135                die("failed to write object");
 136        added_object(nr, type, buf, size);
 137}
 138
 139static void resolve_delta(unsigned nr, enum object_type type,
 140                          void *base, unsigned long base_size,
 141                          void *delta, unsigned long delta_size)
 142{
 143        void *result;
 144        unsigned long result_size;
 145
 146        result = patch_delta(base, base_size,
 147                             delta, delta_size,
 148                             &result_size);
 149        if (!result)
 150                die("failed to apply delta");
 151        free(delta);
 152        write_object(nr, type, result, result_size);
 153        free(result);
 154}
 155
 156static void added_object(unsigned nr, enum object_type type,
 157                         void *data, unsigned long size)
 158{
 159        struct delta_info **p = &delta_list;
 160        struct delta_info *info;
 161
 162        while ((info = *p) != NULL) {
 163                if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
 164                    info->base_offset == obj_list[nr].offset) {
 165                        *p = info->next;
 166                        p = &delta_list;
 167                        resolve_delta(info->nr, type, data, size,
 168                                      info->delta, info->size);
 169                        free(info);
 170                        continue;
 171                }
 172                p = &info->next;
 173        }
 174}
 175
 176static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 177                                   unsigned nr)
 178{
 179        void *buf = get_data(size);
 180
 181        if (!dry_run && buf)
 182                write_object(nr, type, buf, size);
 183        free(buf);
 184}
 185
 186static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 187                               unsigned nr)
 188{
 189        void *delta_data, *base;
 190        unsigned long base_size;
 191        unsigned char base_sha1[20];
 192
 193        if (type == OBJ_REF_DELTA) {
 194                hashcpy(base_sha1, fill(20));
 195                use(20);
 196                delta_data = get_data(delta_size);
 197                if (dry_run || !delta_data) {
 198                        free(delta_data);
 199                        return;
 200                }
 201                if (!has_sha1_file(base_sha1)) {
 202                        hashcpy(obj_list[nr].sha1, null_sha1);
 203                        add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 204                        return;
 205                }
 206        } else {
 207                unsigned base_found = 0;
 208                unsigned char *pack, c;
 209                off_t base_offset;
 210                unsigned lo, mid, hi;
 211
 212                pack = fill(1);
 213                c = *pack;
 214                use(1);
 215                base_offset = c & 127;
 216                while (c & 128) {
 217                        base_offset += 1;
 218                        if (!base_offset || MSB(base_offset, 7))
 219                                die("offset value overflow for delta base object");
 220                        pack = fill(1);
 221                        c = *pack;
 222                        use(1);
 223                        base_offset = (base_offset << 7) + (c & 127);
 224                }
 225                base_offset = obj_list[nr].offset - base_offset;
 226
 227                delta_data = get_data(delta_size);
 228                if (dry_run || !delta_data) {
 229                        free(delta_data);
 230                        return;
 231                }
 232                lo = 0;
 233                hi = nr;
 234                while (lo < hi) {
 235                        mid = (lo + hi)/2;
 236                        if (base_offset < obj_list[mid].offset) {
 237                                hi = mid;
 238                        } else if (base_offset > obj_list[mid].offset) {
 239                                lo = mid + 1;
 240                        } else {
 241                                hashcpy(base_sha1, obj_list[mid].sha1);
 242                                base_found = !is_null_sha1(base_sha1);
 243                                break;
 244                        }
 245                }
 246                if (!base_found) {
 247                        /* The delta base object is itself a delta that
 248                           has not been resolved yet. */
 249                        hashcpy(obj_list[nr].sha1, null_sha1);
 250                        add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 251                        return;
 252                }
 253        }
 254
 255        base = read_sha1_file(base_sha1, &type, &base_size);
 256        if (!base) {
 257                error("failed to read delta-pack base object %s",
 258                      sha1_to_hex(base_sha1));
 259                if (!recover)
 260                        exit(1);
 261                has_errors = 1;
 262                return;
 263        }
 264        resolve_delta(nr, type, base, base_size, delta_data, delta_size);
 265        free(base);
 266}
 267
 268static void unpack_one(unsigned nr)
 269{
 270        unsigned shift;
 271        unsigned char *pack, c;
 272        unsigned long size;
 273        enum object_type type;
 274
 275        obj_list[nr].offset = consumed_bytes;
 276
 277        pack = fill(1);
 278        c = *pack;
 279        use(1);
 280        type = (c >> 4) & 7;
 281        size = (c & 15);
 282        shift = 4;
 283        while (c & 0x80) {
 284                pack = fill(1);
 285                c = *pack;
 286                use(1);
 287                size += (c & 0x7f) << shift;
 288                shift += 7;
 289        }
 290
 291        switch (type) {
 292        case OBJ_COMMIT:
 293        case OBJ_TREE:
 294        case OBJ_BLOB:
 295        case OBJ_TAG:
 296                unpack_non_delta_entry(type, size, nr);
 297                return;
 298        case OBJ_REF_DELTA:
 299        case OBJ_OFS_DELTA:
 300                unpack_delta_entry(type, size, nr);
 301                return;
 302        default:
 303                error("bad object type %d", type);
 304                has_errors = 1;
 305                if (recover)
 306                        return;
 307                exit(1);
 308        }
 309}
 310
 311static void unpack_all(void)
 312{
 313        int i;
 314        struct progress progress;
 315        struct pack_header *hdr = fill(sizeof(struct pack_header));
 316        unsigned nr_objects = ntohl(hdr->hdr_entries);
 317
 318        if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 319                die("bad pack file");
 320        if (!pack_version_ok(hdr->hdr_version))
 321                die("unknown pack file version %d", ntohl(hdr->hdr_version));
 322        use(sizeof(struct pack_header));
 323
 324        if (!quiet)
 325                start_progress(&progress, "Unpacking %u objects...", "", nr_objects);
 326        obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 327        for (i = 0; i < nr_objects; i++) {
 328                unpack_one(i);
 329                if (!quiet)
 330                        display_progress(&progress, i + 1);
 331        }
 332        if (!quiet)
 333                stop_progress(&progress);
 334
 335        if (delta_list)
 336                die("unresolved deltas left after unpacking");
 337}
 338
 339int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 340{
 341        int i;
 342        unsigned char sha1[20];
 343
 344        git_config(git_default_config);
 345
 346        quiet = !isatty(2);
 347
 348        for (i = 1 ; i < argc; i++) {
 349                const char *arg = argv[i];
 350
 351                if (*arg == '-') {
 352                        if (!strcmp(arg, "-n")) {
 353                                dry_run = 1;
 354                                continue;
 355                        }
 356                        if (!strcmp(arg, "-q")) {
 357                                quiet = 1;
 358                                continue;
 359                        }
 360                        if (!strcmp(arg, "-r")) {
 361                                recover = 1;
 362                                continue;
 363                        }
 364                        if (!prefixcmp(arg, "--pack_header=")) {
 365                                struct pack_header *hdr;
 366                                char *c;
 367
 368                                hdr = (struct pack_header *)buffer;
 369                                hdr->hdr_signature = htonl(PACK_SIGNATURE);
 370                                hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
 371                                if (*c != ',')
 372                                        die("bad %s", arg);
 373                                hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
 374                                if (*c)
 375                                        die("bad %s", arg);
 376                                len = sizeof(*hdr);
 377                                continue;
 378                        }
 379                        usage(unpack_usage);
 380                }
 381
 382                /* We don't take any non-flag arguments now.. Maybe some day */
 383                usage(unpack_usage);
 384        }
 385        SHA1_Init(&ctx);
 386        unpack_all();
 387        SHA1_Update(&ctx, buffer, offset);
 388        SHA1_Final(sha1, &ctx);
 389        if (hashcmp(fill(20), sha1))
 390                die("final sha1 did not match");
 391        use(20);
 392
 393        /* Write the last part of the buffer to stdout */
 394        while (len) {
 395                int ret = xwrite(1, buffer + offset, len);
 396                if (ret <= 0)
 397                        break;
 398                len -= ret;
 399                offset += ret;
 400        }
 401
 402        /* All done */
 403        return has_errors;
 404}