unpack-objects.con commit [PATCH] Finish initial cut of git-pack-object/git-unpack-object pair. (8ee378a)
   1#include "cache.h"
   2#include "object.h"
   3#include "delta.h"
   4
   5static int dry_run;
   6static int nr_entries;
   7static const char *base_name;
   8static const char unpack_usage[] = "git-unpack-objects basename";
   9
  10struct pack_entry {
  11        unsigned int offset; /* network byte order */
  12        unsigned char sha1[20];
  13};
  14
  15static void *pack_base;
  16static unsigned long pack_size;
  17static void *index_base;
  18static unsigned long index_size;
  19
  20static struct pack_entry **pack_list;
  21
  22static void *map_file(const char *suffix, unsigned long *sizep)
  23{
  24        static char pathname[PATH_MAX];
  25        unsigned long len;
  26        int fd;
  27        struct stat st;
  28        void *map;
  29
  30        len = snprintf(pathname, PATH_MAX, "%s.%s", base_name, suffix);
  31        if (len >= PATH_MAX)
  32                die("bad pack base-name");
  33        fd = open(pathname, O_RDONLY);
  34        if (fd < 0 || fstat(fd, &st))
  35                die("unable to open '%s'", pathname);
  36        len = st.st_size;
  37        if (!len)
  38                die("bad pack file '%s'", pathname);
  39        map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
  40        if (-1 == (int)(long)map)
  41                die("unable to mmap '%s'", pathname);
  42        close(fd);
  43        *sizep = len;
  44        return map;
  45}
  46
  47static int sort_by_offset(const void *_a, const void *_b)
  48{
  49        struct pack_entry *a = *(struct pack_entry **)_a;
  50        struct pack_entry *b = *(struct pack_entry **)_b;
  51        unsigned int o1, o2;
  52
  53        o1 = ntohl(a->offset);
  54        o2 = ntohl(b->offset);
  55        return o1 < o2 ? -1 : 1;
  56}
  57
  58static int check_index(void)
  59{
  60        unsigned int *array = index_base;
  61        unsigned int nr;
  62        int i;
  63
  64        if (index_size < 4*256)
  65                return error("index file too small");
  66        nr = 0;
  67        for (i = 0; i < 256; i++) {
  68                unsigned int n = ntohl(array[i]);
  69                if (n < nr)
  70                        return error("non-monotonic index");
  71                nr = n;
  72        }
  73        if (index_size != 4*256 + nr * 24) {
  74                printf("index_size=%lu, expected %u (%u)\n",
  75                       index_size, 4*256 + nr * 24, nr);
  76                return error("wrong index file size");
  77        }
  78
  79        nr_entries = nr;
  80        pack_list = xmalloc(nr * sizeof(struct pack_entry *));
  81        for (i = 0; i < nr; i++)
  82                pack_list[i] = index_base + 4*256 + i*24;
  83
  84        qsort(pack_list, nr, sizeof(*pack_list), sort_by_offset);
  85
  86        printf("%d entries\n", nr);
  87        return 0;
  88}
  89
  90static int unpack_non_delta_entry(struct pack_entry *entry,
  91                                  unsigned char *pack)
  92{
  93        int st, kind;
  94        unsigned long size;
  95        z_stream stream;
  96        char *buffer;
  97        unsigned char sha1[20];
  98        char *type_s;
  99        unsigned long offset = ntohl(entry->offset);
 100
 101        kind = pack[0];
 102        size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
 103        printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size);
 104        pack += 5;
 105
 106        buffer = xmalloc(size + 1);
 107        buffer[size] = 0;
 108        memset(&stream, 0, sizeof(stream));
 109        stream.next_in = pack;
 110        stream.avail_in = pack_size - offset; /* sheesh. */
 111        stream.next_out = buffer;
 112        stream.avail_out = size;
 113
 114        inflateInit(&stream);
 115        st = inflate(&stream, Z_FINISH);
 116        inflateEnd(&stream);
 117        if ((st != Z_STREAM_END) || stream.total_out != size)
 118                goto err_finish;
 119        switch (kind) {
 120        case 'C': type_s = "commit"; break;
 121        case 'T': type_s = "tree"; break;
 122        case 'B': type_s = "blob"; break;
 123        default: goto err_finish;
 124        }
 125        if (write_sha1_file(buffer, size, type_s, sha1) < 0)
 126                die("failed to write %s (%s)",
 127                    sha1_to_hex(entry->sha1), type_s);
 128        printf("%s %s\n", sha1_to_hex(sha1), type_s);
 129        if (memcmp(sha1, entry->sha1, 20))
 130                die("resulting %s have wrong SHA1", type_s);
 131
 132 finish:
 133        st = 0;
 134        free(buffer);
 135        return st;
 136 err_finish:
 137        st = -1;
 138        goto finish;
 139}
 140
 141static int find_pack_entry(unsigned char *sha1, struct pack_entry **ent)
 142{
 143        int *level1_ofs = index_base;
 144        int hi = ntohl(level1_ofs[*sha1]);
 145        int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
 146        void *index = index_base + 4*256;
 147
 148        do {
 149                int mi = (lo + hi) / 2;
 150                int cmp = memcmp(index + 24 * mi + 4, sha1, 20);
 151                if (!cmp) {
 152                        *ent = index + 24 * mi;
 153                        return 1;
 154                }
 155                if (cmp < 0)
 156                        hi = mi;
 157                else
 158                        lo = mi;
 159        } while (lo < hi);
 160        return 0;
 161}
 162
 163/* forward declaration for a mutually recursive function */
 164static void unpack_entry(struct pack_entry *);
 165
 166static int unpack_delta_entry(struct pack_entry *entry, unsigned char *pack)
 167{
 168        void *delta_data, *result, *base;
 169        unsigned long delta_alloc, delta_size, result_size, base_size;
 170        z_stream stream;
 171        int st;
 172        char type[20];
 173        unsigned char sha1[20];
 174
 175        printf("%s D", sha1_to_hex(entry->sha1));
 176        printf(" %s\n", sha1_to_hex(pack+1));
 177
 178        /* pack+1 is the base sha1, unless we have it, we need to
 179         * unpack it first.
 180         */
 181        if (!has_sha1_file(pack+1)) {
 182                struct pack_entry *base;
 183                if (!find_pack_entry(pack+1, &base))
 184                        die("cannot find delta-pack base object");
 185                unpack_entry(base);
 186        }
 187
 188        /* pack+1 thru pack+20 is the base sha1 and
 189         * pack+21 thru unknown number is the delta data.
 190         * we do not even have size of the delta data uncompressed.
 191         * sheesh!
 192         */
 193        delta_alloc = 1024;
 194        delta_data = xmalloc(delta_alloc);
 195
 196        memset(&stream, 0, sizeof(stream));
 197
 198        stream.next_in = pack + 21;
 199        stream.avail_in = pack_size - ntohl(entry->offset); /* sheesh. */
 200        stream.next_out = delta_data;
 201        stream.avail_out = delta_alloc;
 202        delta_size = 0;
 203
 204        inflateInit(&stream);
 205        while (1) {
 206                st = inflate(&stream, Z_FINISH);
 207                if (st == Z_STREAM_END) {
 208                        delta_size = stream.total_out;
 209                        break;
 210                }
 211                if (st < 0)
 212                        break;
 213
 214                if (delta_alloc <= stream.total_out) {
 215                        delta_alloc = (delta_alloc +1024) * 3 / 2;
 216                        delta_data = xrealloc(delta_data, delta_alloc);
 217                        stream.next_out = delta_data + stream.total_out;
 218                        stream.avail_out = delta_alloc - stream.total_out;
 219                }
 220        }
 221        inflateEnd(&stream);
 222        if (st != Z_STREAM_END) {
 223                free(delta_data);
 224                return -1;
 225        }
 226
 227        base = read_sha1_file(pack+1, type, &base_size);
 228        if (!base)
 229                die("failed to read delta-pack base object");
 230        result = patch_delta(base, base_size,
 231                             delta_data, delta_size,
 232                             &result_size);
 233        if (!result)
 234                die("failed to apply delta");
 235        free(delta_data);
 236
 237        if (write_sha1_file(result, result_size, type, sha1) < 0)
 238                die("failed to write %s (%s)",
 239                    sha1_to_hex(entry->sha1), type);
 240        free(result);
 241        printf("%s %s\n", sha1_to_hex(sha1), type);
 242        if (memcmp(sha1, entry->sha1, 20))
 243                die("resulting %s have wrong SHA1", type);
 244        return 0;
 245}
 246
 247static void unpack_entry(struct pack_entry *entry)
 248{
 249        unsigned long offset;
 250        unsigned char *pack;
 251
 252        /* Have we done this one already due to deltas based on it? */
 253        if (lookup_object(entry->sha1))
 254                return;
 255
 256        offset = ntohl(entry->offset);
 257        if (offset > pack_size - 5)
 258                die("object offset outside of pack file");
 259        pack = pack_base + offset;
 260        offset = pack_size - offset;
 261        switch (*pack) {
 262        case 'C': case 'T': case 'B':
 263                unpack_non_delta_entry(entry, pack);
 264                break;
 265        case 'D':
 266                unpack_delta_entry(entry, pack);
 267                break;
 268        default:
 269                die("corrupted pack file");
 270        }
 271}
 272
 273/*
 274 * We unpack from the end, older files first. Now, usually
 275 * there are deltas etc, so we'll not actually write the
 276 * objects in that order, but we might as well try..
 277 */
 278static void unpack_all(void)
 279{
 280        int i = nr_entries;
 281
 282        while (--i >= 0) {
 283                struct pack_entry *entry = pack_list[i];
 284                unpack_entry(entry);
 285        }
 286}
 287
 288int main(int argc, char **argv)
 289{
 290        int i;
 291
 292        for (i = 1 ; i < argc; i++) {
 293                const char *arg = argv[i];
 294
 295                if (*arg == '-') {
 296                        if (!strcmp(arg, "-n")) {
 297                                dry_run = 1;
 298                                continue;
 299                        }
 300                        usage(unpack_usage);
 301                }
 302                if (base_name)
 303                        usage(unpack_usage);
 304                base_name = arg;
 305        }
 306        if (!base_name)
 307                usage(unpack_usage);
 308        index_base = map_file("idx", &index_size);
 309        pack_base = map_file("pack", &pack_size);
 310        if (check_index() < 0)
 311                die("bad index file");
 312        unpack_all();
 313        return 0;
 314}