contrib / convert-objects / convert-objects.con commit remote-svn: add incremental import (8e43a1d)
   1#include "cache.h"
   2#include "blob.h"
   3#include "commit.h"
   4#include "tree.h"
   5
   6struct entry {
   7        unsigned char old_sha1[20];
   8        unsigned char new_sha1[20];
   9        int converted;
  10};
  11
  12#define MAXOBJECTS (1000000)
  13
  14static struct entry *convert[MAXOBJECTS];
  15static int nr_convert;
  16
  17static struct entry * convert_entry(unsigned char *sha1);
  18
  19static struct entry *insert_new(unsigned char *sha1, int pos)
  20{
  21        struct entry *new = xcalloc(1, sizeof(struct entry));
  22        hashcpy(new->old_sha1, sha1);
  23        memmove(convert + pos + 1, convert + pos, (nr_convert - pos) * sizeof(struct entry *));
  24        convert[pos] = new;
  25        nr_convert++;
  26        if (nr_convert == MAXOBJECTS)
  27                die("you're kidding me - hit maximum object limit");
  28        return new;
  29}
  30
  31static struct entry *lookup_entry(unsigned char *sha1)
  32{
  33        int low = 0, high = nr_convert;
  34
  35        while (low < high) {
  36                int next = (low + high) / 2;
  37                struct entry *n = convert[next];
  38                int cmp = hashcmp(sha1, n->old_sha1);
  39                if (!cmp)
  40                        return n;
  41                if (cmp < 0) {
  42                        high = next;
  43                        continue;
  44                }
  45                low = next+1;
  46        }
  47        return insert_new(sha1, low);
  48}
  49
  50static void convert_binary_sha1(void *buffer)
  51{
  52        struct entry *entry = convert_entry(buffer);
  53        hashcpy(buffer, entry->new_sha1);
  54}
  55
  56static void convert_ascii_sha1(void *buffer)
  57{
  58        unsigned char sha1[20];
  59        struct entry *entry;
  60
  61        if (get_sha1_hex(buffer, sha1))
  62                die("expected sha1, got '%s'", (char *) buffer);
  63        entry = convert_entry(sha1);
  64        memcpy(buffer, sha1_to_hex(entry->new_sha1), 40);
  65}
  66
  67static unsigned int convert_mode(unsigned int mode)
  68{
  69        unsigned int newmode;
  70
  71        newmode = mode & S_IFMT;
  72        if (S_ISREG(mode))
  73                newmode |= (mode & 0100) ? 0755 : 0644;
  74        return newmode;
  75}
  76
  77static int write_subdirectory(void *buffer, unsigned long size, const char *base, int baselen, unsigned char *result_sha1)
  78{
  79        char *new = xmalloc(size);
  80        unsigned long newlen = 0;
  81        unsigned long used;
  82
  83        used = 0;
  84        while (size) {
  85                int len = 21 + strlen(buffer);
  86                char *path = strchr(buffer, ' ');
  87                unsigned char *sha1;
  88                unsigned int mode;
  89                char *slash, *origpath;
  90
  91                if (!path || strtoul_ui(buffer, 8, &mode))
  92                        die("bad tree conversion");
  93                mode = convert_mode(mode);
  94                path++;
  95                if (memcmp(path, base, baselen))
  96                        break;
  97                origpath = path;
  98                path += baselen;
  99                slash = strchr(path, '/');
 100                if (!slash) {
 101                        newlen += sprintf(new + newlen, "%o %s", mode, path);
 102                        new[newlen++] = '\0';
 103                        hashcpy((unsigned char *)new + newlen, (unsigned char *) buffer + len - 20);
 104                        newlen += 20;
 105
 106                        used += len;
 107                        size -= len;
 108                        buffer = (char *) buffer + len;
 109                        continue;
 110                }
 111
 112                newlen += sprintf(new + newlen, "%o %.*s", S_IFDIR, (int)(slash - path), path);
 113                new[newlen++] = 0;
 114                sha1 = (unsigned char *)(new + newlen);
 115                newlen += 20;
 116
 117                len = write_subdirectory(buffer, size, origpath, slash-origpath+1, sha1);
 118
 119                used += len;
 120                size -= len;
 121                buffer = (char *) buffer + len;
 122        }
 123
 124        write_sha1_file(new, newlen, tree_type, result_sha1);
 125        free(new);
 126        return used;
 127}
 128
 129static void convert_tree(void *buffer, unsigned long size, unsigned char *result_sha1)
 130{
 131        void *orig_buffer = buffer;
 132        unsigned long orig_size = size;
 133
 134        while (size) {
 135                size_t len = 1+strlen(buffer);
 136
 137                convert_binary_sha1((char *) buffer + len);
 138
 139                len += 20;
 140                if (len > size)
 141                        die("corrupt tree object");
 142                size -= len;
 143                buffer = (char *) buffer + len;
 144        }
 145
 146        write_subdirectory(orig_buffer, orig_size, "", 0, result_sha1);
 147}
 148
 149static unsigned long parse_oldstyle_date(const char *buf)
 150{
 151        char c, *p;
 152        char buffer[100];
 153        struct tm tm;
 154        const char *formats[] = {
 155                "%c",
 156                "%a %b %d %T",
 157                "%Z",
 158                "%Y",
 159                " %Y",
 160                NULL
 161        };
 162        /* We only ever did two timezones in the bad old format .. */
 163        const char *timezones[] = {
 164                "PDT", "PST", "CEST", NULL
 165        };
 166        const char **fmt = formats;
 167
 168        p = buffer;
 169        while (isspace(c = *buf))
 170                buf++;
 171        while ((c = *buf++) != '\n')
 172                *p++ = c;
 173        *p++ = 0;
 174        buf = buffer;
 175        memset(&tm, 0, sizeof(tm));
 176        do {
 177                const char *next = strptime(buf, *fmt, &tm);
 178                if (next) {
 179                        if (!*next)
 180                                return mktime(&tm);
 181                        buf = next;
 182                } else {
 183                        const char **p = timezones;
 184                        while (isspace(*buf))
 185                                buf++;
 186                        while (*p) {
 187                                if (!memcmp(buf, *p, strlen(*p))) {
 188                                        buf += strlen(*p);
 189                                        break;
 190                                }
 191                                p++;
 192                        }
 193                }
 194                fmt++;
 195        } while (*buf && *fmt);
 196        printf("left: %s\n", buf);
 197        return mktime(&tm);
 198}
 199
 200static int convert_date_line(char *dst, void **buf, unsigned long *sp)
 201{
 202        unsigned long size = *sp;
 203        char *line = *buf;
 204        char *next = strchr(line, '\n');
 205        char *date = strchr(line, '>');
 206        int len;
 207
 208        if (!next || !date)
 209                die("missing or bad author/committer line %s", line);
 210        next++; date += 2;
 211
 212        *buf = next;
 213        *sp = size - (next - line);
 214
 215        len = date - line;
 216        memcpy(dst, line, len);
 217        dst += len;
 218
 219        /* Is it already in new format? */
 220        if (isdigit(*date)) {
 221                int datelen = next - date;
 222                memcpy(dst, date, datelen);
 223                return len + datelen;
 224        }
 225
 226        /*
 227         * Hacky hacky: one of the sparse old-style commits does not have
 228         * any date at all, but we can fake it by using the committer date.
 229         */
 230        if (*date == '\n' && strchr(next, '>'))
 231                date = strchr(next, '>')+2;
 232
 233        return len + sprintf(dst, "%lu -0700\n", parse_oldstyle_date(date));
 234}
 235
 236static void convert_date(void *buffer, unsigned long size, unsigned char *result_sha1)
 237{
 238        char *new = xmalloc(size + 100);
 239        unsigned long newlen = 0;
 240
 241        /* "tree <sha1>\n" */
 242        memcpy(new + newlen, buffer, 46);
 243        newlen += 46;
 244        buffer = (char *) buffer + 46;
 245        size -= 46;
 246
 247        /* "parent <sha1>\n" */
 248        while (!memcmp(buffer, "parent ", 7)) {
 249                memcpy(new + newlen, buffer, 48);
 250                newlen += 48;
 251                buffer = (char *) buffer + 48;
 252                size -= 48;
 253        }
 254
 255        /* "author xyz <xyz> date" */
 256        newlen += convert_date_line(new + newlen, &buffer, &size);
 257        /* "committer xyz <xyz> date" */
 258        newlen += convert_date_line(new + newlen, &buffer, &size);
 259
 260        /* Rest */
 261        memcpy(new + newlen, buffer, size);
 262        newlen += size;
 263
 264        write_sha1_file(new, newlen, commit_type, result_sha1);
 265        free(new);
 266}
 267
 268static void convert_commit(void *buffer, unsigned long size, unsigned char *result_sha1)
 269{
 270        void *orig_buffer = buffer;
 271        unsigned long orig_size = size;
 272
 273        if (memcmp(buffer, "tree ", 5))
 274                die("Bad commit '%s'", (char *) buffer);
 275        convert_ascii_sha1((char *) buffer + 5);
 276        buffer = (char *) buffer + 46;    /* "tree " + "hex sha1" + "\n" */
 277        while (!memcmp(buffer, "parent ", 7)) {
 278                convert_ascii_sha1((char *) buffer + 7);
 279                buffer = (char *) buffer + 48;
 280        }
 281        convert_date(orig_buffer, orig_size, result_sha1);
 282}
 283
 284static struct entry * convert_entry(unsigned char *sha1)
 285{
 286        struct entry *entry = lookup_entry(sha1);
 287        enum object_type type;
 288        void *buffer, *data;
 289        unsigned long size;
 290
 291        if (entry->converted)
 292                return entry;
 293        data = read_sha1_file(sha1, &type, &size);
 294        if (!data)
 295                die("unable to read object %s", sha1_to_hex(sha1));
 296
 297        buffer = xmalloc(size);
 298        memcpy(buffer, data, size);
 299
 300        if (type == OBJ_BLOB) {
 301                write_sha1_file(buffer, size, blob_type, entry->new_sha1);
 302        } else if (type == OBJ_TREE)
 303                convert_tree(buffer, size, entry->new_sha1);
 304        else if (type == OBJ_COMMIT)
 305                convert_commit(buffer, size, entry->new_sha1);
 306        else
 307                die("unknown object type %d in %s", type, sha1_to_hex(sha1));
 308        entry->converted = 1;
 309        free(buffer);
 310        free(data);
 311        return entry;
 312}
 313
 314int main(int argc, char **argv)
 315{
 316        unsigned char sha1[20];
 317        struct entry *entry;
 318
 319        setup_git_directory();
 320
 321        if (argc != 2)
 322                usage("git-convert-objects <sha1>");
 323        if (get_sha1(argv[1], sha1))
 324                die("Not a valid object name %s", argv[1]);
 325
 326        entry = convert_entry(sha1);
 327        printf("new sha1: %s\n", sha1_to_hex(entry->new_sha1));
 328        return 0;
 329}