builtin-unpack-objects.con commit teach git-unpack-objects about deltas with offset to base (209c554)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "object.h"
   4#include "delta.h"
   5#include "pack.h"
   6#include "blob.h"
   7#include "commit.h"
   8#include "tag.h"
   9#include "tree.h"
  10
  11#include <sys/time.h>
  12
  13static int dry_run, quiet, recover, has_errors;
  14static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
  15
  16/* We always read in 4kB chunks. */
  17static unsigned char buffer[4096];
  18static unsigned long offset, len, consumed_bytes;
  19static SHA_CTX ctx;
  20
  21/*
  22 * Make sure at least "min" bytes are available in the buffer, and
  23 * return the pointer to the buffer.
  24 */
  25static void * fill(int min)
  26{
  27        if (min <= len)
  28                return buffer + offset;
  29        if (min > sizeof(buffer))
  30                die("cannot fill %d bytes", min);
  31        if (offset) {
  32                SHA1_Update(&ctx, buffer, offset);
  33                memcpy(buffer, buffer + offset, len);
  34                offset = 0;
  35        }
  36        do {
  37                int ret = xread(0, buffer + len, sizeof(buffer) - len);
  38                if (ret <= 0) {
  39                        if (!ret)
  40                                die("early EOF");
  41                        die("read error on input: %s", strerror(errno));
  42                }
  43                len += ret;
  44        } while (len < min);
  45        return buffer;
  46}
  47
  48static void use(int bytes)
  49{
  50        if (bytes > len)
  51                die("used more bytes than were available");
  52        len -= bytes;
  53        offset += bytes;
  54        consumed_bytes += bytes;
  55}
  56
  57static void *get_data(unsigned long size)
  58{
  59        z_stream stream;
  60        void *buf = xmalloc(size);
  61
  62        memset(&stream, 0, sizeof(stream));
  63
  64        stream.next_out = buf;
  65        stream.avail_out = size;
  66        stream.next_in = fill(1);
  67        stream.avail_in = len;
  68        inflateInit(&stream);
  69
  70        for (;;) {
  71                int ret = inflate(&stream, 0);
  72                use(len - stream.avail_in);
  73                if (stream.total_out == size && ret == Z_STREAM_END)
  74                        break;
  75                if (ret != Z_OK) {
  76                        error("inflate returned %d\n", ret);
  77                        free(buf);
  78                        buf = NULL;
  79                        if (!recover)
  80                                exit(1);
  81                        has_errors = 1;
  82                        break;
  83                }
  84                stream.next_in = fill(1);
  85                stream.avail_in = len;
  86        }
  87        inflateEnd(&stream);
  88        return buf;
  89}
  90
  91struct delta_info {
  92        unsigned char base_sha1[20];
  93        unsigned long base_offset;
  94        unsigned long size;
  95        void *delta;
  96        unsigned nr;
  97        struct delta_info *next;
  98};
  99
 100static struct delta_info *delta_list;
 101
 102static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 103                              unsigned long base_offset,
 104                              void *delta, unsigned long size)
 105{
 106        struct delta_info *info = xmalloc(sizeof(*info));
 107
 108        hashcpy(info->base_sha1, base_sha1);
 109        info->base_offset = base_offset;
 110        info->size = size;
 111        info->delta = delta;
 112        info->nr = nr;
 113        info->next = delta_list;
 114        delta_list = info;
 115}
 116
 117struct obj_info {
 118        unsigned long offset;
 119        unsigned char sha1[20];
 120};
 121
 122static struct obj_info *obj_list;
 123
 124static void added_object(unsigned nr, const char *type, void *data,
 125                         unsigned long size);
 126
 127static void write_object(unsigned nr, void *buf, unsigned long size,
 128                         const char *type)
 129{
 130        if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
 131                die("failed to write object");
 132        added_object(nr, type, buf, size);
 133}
 134
 135static void resolve_delta(unsigned nr, const char *type,
 136                          void *base, unsigned long base_size,
 137                          void *delta, unsigned long delta_size)
 138{
 139        void *result;
 140        unsigned long result_size;
 141
 142        result = patch_delta(base, base_size,
 143                             delta, delta_size,
 144                             &result_size);
 145        if (!result)
 146                die("failed to apply delta");
 147        free(delta);
 148        write_object(nr, result, result_size, type);
 149        free(result);
 150}
 151
 152static void added_object(unsigned nr, const char *type, void *data,
 153                         unsigned long size)
 154{
 155        struct delta_info **p = &delta_list;
 156        struct delta_info *info;
 157
 158        while ((info = *p) != NULL) {
 159                if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
 160                    info->base_offset == obj_list[nr].offset) {
 161                        *p = info->next;
 162                        p = &delta_list;
 163                        resolve_delta(info->nr, type, data, size,
 164                                      info->delta, info->size);
 165                        free(info);
 166                        continue;
 167                }
 168                p = &info->next;
 169        }
 170}
 171
 172static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
 173                                   unsigned nr)
 174{
 175        void *buf = get_data(size);
 176        const char *type;
 177
 178        switch (kind) {
 179        case OBJ_COMMIT: type = commit_type; break;
 180        case OBJ_TREE:   type = tree_type; break;
 181        case OBJ_BLOB:   type = blob_type; break;
 182        case OBJ_TAG:    type = tag_type; break;
 183        default: die("bad type %d", kind);
 184        }
 185        if (!dry_run && buf)
 186                write_object(nr, buf, size, type);
 187        free(buf);
 188}
 189
 190static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
 191                               unsigned nr)
 192{
 193        void *delta_data, *base;
 194        unsigned long base_size;
 195        char type[20];
 196        unsigned char base_sha1[20];
 197
 198        if (kind == OBJ_REF_DELTA) {
 199                hashcpy(base_sha1, fill(20));
 200                use(20);
 201                delta_data = get_data(delta_size);
 202                if (dry_run || !delta_data) {
 203                        free(delta_data);
 204                        return;
 205                }
 206                if (!has_sha1_file(base_sha1)) {
 207                        hashcpy(obj_list[nr].sha1, null_sha1);
 208                        add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 209                        return;
 210                }
 211        } else {
 212                unsigned base_found = 0;
 213                unsigned char *pack, c;
 214                unsigned long base_offset;
 215                unsigned lo, mid, hi;
 216
 217                pack = fill(1);
 218                c = *pack;
 219                use(1);
 220                base_offset = c & 127;
 221                while (c & 128) {
 222                        base_offset += 1;
 223                        if (!base_offset || base_offset & ~(~0UL >> 7))
 224                                die("offset value overflow for delta base object");
 225                        pack = fill(1);
 226                        c = *pack;
 227                        use(1);
 228                        base_offset = (base_offset << 7) + (c & 127);
 229                }
 230                base_offset = obj_list[nr].offset - base_offset;
 231
 232                delta_data = get_data(delta_size);
 233                if (dry_run || !delta_data) {
 234                        free(delta_data);
 235                        return;
 236                }
 237                lo = 0;
 238                hi = nr;
 239                while (lo < hi) {
 240                        mid = (lo + hi)/2;
 241                        if (base_offset < obj_list[mid].offset) {
 242                                hi = mid;
 243                        } else if (base_offset > obj_list[mid].offset) {
 244                                lo = mid + 1;
 245                        } else {
 246                                hashcpy(base_sha1, obj_list[mid].sha1);
 247                                base_found = !is_null_sha1(base_sha1);
 248                                break;
 249                        }
 250                }
 251                if (!base_found) {
 252                        /* The delta base object is itself a delta that
 253                           has not been resolved yet. */
 254                        hashcpy(obj_list[nr].sha1, null_sha1);
 255                        add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 256                        return;
 257                }
 258        }
 259
 260        base = read_sha1_file(base_sha1, type, &base_size);
 261        if (!base) {
 262                error("failed to read delta-pack base object %s",
 263                      sha1_to_hex(base_sha1));
 264                if (!recover)
 265                        exit(1);
 266                has_errors = 1;
 267                return;
 268        }
 269        resolve_delta(nr, type, base, base_size, delta_data, delta_size);
 270        free(base);
 271}
 272
 273static void unpack_one(unsigned nr, unsigned total)
 274{
 275        unsigned shift;
 276        unsigned char *pack, c;
 277        unsigned long size;
 278        enum object_type type;
 279
 280        obj_list[nr].offset = consumed_bytes;
 281
 282        pack = fill(1);
 283        c = *pack;
 284        use(1);
 285        type = (c >> 4) & 7;
 286        size = (c & 15);
 287        shift = 4;
 288        while (c & 0x80) {
 289                pack = fill(1);
 290                c = *pack;
 291                use(1);
 292                size += (c & 0x7f) << shift;
 293                shift += 7;
 294        }
 295        if (!quiet) {
 296                static unsigned long last_sec;
 297                static unsigned last_percent;
 298                struct timeval now;
 299                unsigned percentage = ((nr+1) * 100) / total;
 300
 301                gettimeofday(&now, NULL);
 302                if (percentage != last_percent || now.tv_sec != last_sec) {
 303                        last_sec = now.tv_sec;
 304                        last_percent = percentage;
 305                        fprintf(stderr, "%4u%% (%u/%u) done\r",
 306                                        percentage, (nr+1), total);
 307                }
 308        }
 309        switch (type) {
 310        case OBJ_COMMIT:
 311        case OBJ_TREE:
 312        case OBJ_BLOB:
 313        case OBJ_TAG:
 314                unpack_non_delta_entry(type, size, nr);
 315                return;
 316        case OBJ_REF_DELTA:
 317        case OBJ_OFS_DELTA:
 318                unpack_delta_entry(type, size, nr);
 319                return;
 320        default:
 321                error("bad object type %d", type);
 322                has_errors = 1;
 323                if (recover)
 324                        return;
 325                exit(1);
 326        }
 327}
 328
 329static void unpack_all(void)
 330{
 331        int i;
 332        struct pack_header *hdr = fill(sizeof(struct pack_header));
 333        unsigned nr_objects = ntohl(hdr->hdr_entries);
 334
 335        if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 336                die("bad pack file");
 337        if (!pack_version_ok(hdr->hdr_version))
 338                die("unknown pack file version %d", ntohl(hdr->hdr_version));
 339        fprintf(stderr, "Unpacking %d objects\n", nr_objects);
 340
 341        obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 342        use(sizeof(struct pack_header));
 343        for (i = 0; i < nr_objects; i++)
 344                unpack_one(i, nr_objects);
 345        if (delta_list)
 346                die("unresolved deltas left after unpacking");
 347}
 348
 349int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 350{
 351        int i;
 352        unsigned char sha1[20];
 353
 354        git_config(git_default_config);
 355
 356        quiet = !isatty(2);
 357
 358        for (i = 1 ; i < argc; i++) {
 359                const char *arg = argv[i];
 360
 361                if (*arg == '-') {
 362                        if (!strcmp(arg, "-n")) {
 363                                dry_run = 1;
 364                                continue;
 365                        }
 366                        if (!strcmp(arg, "-q")) {
 367                                quiet = 1;
 368                                continue;
 369                        }
 370                        if (!strcmp(arg, "-r")) {
 371                                recover = 1;
 372                                continue;
 373                        }
 374                        usage(unpack_usage);
 375                }
 376
 377                /* We don't take any non-flag arguments now.. Maybe some day */
 378                usage(unpack_usage);
 379        }
 380        SHA1_Init(&ctx);
 381        unpack_all();
 382        SHA1_Update(&ctx, buffer, offset);
 383        SHA1_Final(sha1, &ctx);
 384        if (hashcmp(fill(20), sha1))
 385                die("final sha1 did not match");
 386        use(20);
 387
 388        /* Write the last part of the buffer to stdout */
 389        while (len) {
 390                int ret = xwrite(1, buffer + offset, len);
 391                if (ret <= 0)
 392                        break;
 393                len -= ret;
 394                offset += ret;
 395        }
 396
 397        /* All done */
 398        if (!quiet)
 399                fprintf(stderr, "\n");
 400        return has_errors;
 401}