Added option to export the marks table when fast-import terminates.
[gitweb.git] / fast-import.c
index 50171d69cabd40ca07ffef9a037b122450a0dbf1..d61da3adecd3c2725a05d65728f09ce5eb5c8c4d 100644 (file)
@@ -4,7 +4,6 @@ Format of STDIN stream:
   stream ::= cmd*;
 
   cmd ::= new_blob
-        | new_branch
         | new_commit
         | new_tag
         ;
@@ -14,15 +13,12 @@ Format of STDIN stream:
     file_content;
   file_content ::= data;
 
-  new_branch ::= 'branch' sp ref_str lf
-    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
-    lf;
-
   new_commit ::= 'commit' sp ref_str lf
-       mark?
-       ('author' sp name '<' email '>' ts tz lf)?
-       'committer' sp name '<' email '>' ts tz lf
-       commit_msg
+    mark?
+    ('author' sp name '<' email '>' ts tz lf)?
+    'committer' sp name '<' email '>' ts tz lf
+    commit_msg
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
     file_change*
     lf;
   commit_msg ::= data;
@@ -185,12 +181,20 @@ struct branch
        unsigned char sha1[20];
 };
 
+struct tag
+{
+       struct tag *next_tag;
+       const char *name;
+       unsigned char sha1[20];
+};
+
 
 /* Stats and misc. counters */
 static unsigned long max_depth = 10;
 static unsigned long alloc_count;
 static unsigned long branch_count;
 static unsigned long branch_load_count;
+static unsigned long remap_count;
 static unsigned long object_count;
 static unsigned long duplicate_count;
 static unsigned long marks_set_count;
@@ -209,14 +213,17 @@ static struct atom_str **atom_table;
 
 /* The .pack file being generated */
 static int pack_fd;
-static unsigned long pack_offset;
+static unsigned long pack_size;
 static unsigned char pack_sha1[20];
+static void* pack_base;
+static size_t pack_mlen;
 
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 1000;
 static struct object_entry_pool *blocks;
 static struct object_entry *object_table[1 << 16];
 static struct mark_set *marks;
+static const char* mark_file;
 
 /* Our last blob */
 static struct last_object last_blob;
@@ -234,6 +241,10 @@ static unsigned long branch_table_sz = 1039;
 static struct branch **branch_table;
 static struct branch *active_branches;
 
+/* Tag data */
+static struct tag *first_tag;
+static struct tag *last_tag;
+
 /* Input stream parsing */
 static struct strbuf command_buf;
 static unsigned long next_mark;
@@ -328,6 +339,9 @@ static void* pool_alloc(size_t len)
        }
 
        r = p->next_free;
+       /* round out to a pointer alignment */
+       if (len & (sizeof(void*) - 1))
+               len += sizeof(void*) - (len & (sizeof(void*) - 1));
        p->next_free += len;
        return r;
 }
@@ -503,6 +517,7 @@ static struct tree_entry* new_tree_entry()
 
        if (!avail_tree_entry) {
                unsigned int n = tree_entry_alloc;
+               total_allocd += n * sizeof(struct tree_entry);
                avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
                while (n--) {
                        *((void**)e) = e + 1;
@@ -605,7 +620,7 @@ static int store_object(
                return 1;
        }
        e->type = type;
-       e->offset = pack_offset;
+       e->offset = pack_size;
        object_count++;
        object_count_by_type[type]++;
 
@@ -626,7 +641,7 @@ static int store_object(
                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
                ywrite(pack_fd, hdr, hdrlen);
                ywrite(pack_fd, last->sha1, sizeof(sha1));
-               pack_offset += hdrlen + sizeof(sha1);
+               pack_size += hdrlen + sizeof(sha1);
        } else {
                if (last)
                        last->depth = 0;
@@ -634,7 +649,7 @@ static int store_object(
                s.avail_in = datlen;
                hdrlen = encode_header(type, datlen, hdr);
                ywrite(pack_fd, hdr, hdrlen);
-               pack_offset += hdrlen;
+               pack_size += hdrlen;
        }
 
        s.avail_out = deflateBound(&s, s.avail_in);
@@ -644,7 +659,7 @@ static int store_object(
        deflateEnd(&s);
 
        ywrite(pack_fd, out, s.total_out);
-       pack_offset += s.total_out;
+       pack_size += s.total_out;
 
        free(out);
        if (delta)
@@ -659,6 +674,127 @@ static int store_object(
        return 0;
 }
 
+static void* map_pack(unsigned long offset)
+{
+       if (offset >= pack_size)
+               die("object offset outside of pack file");
+       if (offset >= pack_mlen) {
+               if (pack_base)
+                       munmap(pack_base, pack_mlen);
+               /* round out how much we map to 16 MB units */
+               pack_mlen = pack_size;
+               if (pack_mlen & ((1 << 24) - 1))
+                       pack_mlen = ((pack_mlen >> 24) + 1) << 24;
+               pack_base = mmap(NULL,pack_mlen,PROT_READ,MAP_SHARED,pack_fd,0);
+               if (pack_base == MAP_FAILED)
+                       die("Failed to map generated pack: %s", strerror(errno));
+               remap_count++;
+       }
+       return (char*)pack_base + offset;
+}
+
+static unsigned long unpack_object_header(unsigned long offset,
+       enum object_type *type,
+       unsigned long *sizep)
+{
+       unsigned shift;
+       unsigned char c;
+       unsigned long size;
+
+       c = *(unsigned char*)map_pack(offset++);
+       *type = (c >> 4) & 7;
+       size = c & 15;
+       shift = 4;
+       while (c & 0x80) {
+               c = *(unsigned char*)map_pack(offset++);
+               size += (c & 0x7f) << shift;
+               shift += 7;
+       }
+       *sizep = size;
+       return offset;
+}
+
+static void *unpack_non_delta_entry(unsigned long o, unsigned long sz)
+{
+       z_stream stream;
+       unsigned char *result;
+
+       result = xmalloc(sz + 1);
+       result[sz] = 0;
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_in = map_pack(o);
+       stream.avail_in = pack_mlen - o;
+       stream.next_out = result;
+       stream.avail_out = sz;
+
+       inflateInit(&stream);
+       for (;;) {
+               int st = inflate(&stream, Z_FINISH);
+               if (st == Z_STREAM_END)
+                       break;
+               if (st == Z_OK) {
+                       o = stream.next_in - (unsigned char*)pack_base;
+                       stream.next_in = map_pack(o);
+                       stream.avail_in = pack_mlen - o;
+                       continue;
+               }
+               die("Error from zlib during inflate.");
+       }
+       inflateEnd(&stream);
+       if (stream.total_out != sz)
+               die("Error after inflate: sizes mismatch");
+       return result;
+}
+
+static void *unpack_entry(unsigned long offset, unsigned long *sizep);
+
+static void *unpack_delta_entry(unsigned long offset,
+       unsigned long delta_size,
+       unsigned long *sizep)
+{
+       struct object_entry *base_oe;
+       unsigned char *base_sha1;
+       void *delta_data, *base, *result;
+       unsigned long base_size, result_size;
+
+       base_sha1 = (unsigned char*)map_pack(offset + 20) - 20;
+       base_oe = find_object(base_sha1);
+       if (!base_oe)
+               die("I'm broken; I can't find a base I know must be here.");
+       base = unpack_entry(base_oe->offset, &base_size);
+       delta_data = unpack_non_delta_entry(offset + 20, delta_size);
+       result = patch_delta(base, base_size,
+                            delta_data, delta_size,
+                            &result_size);
+       if (!result)
+               die("failed to apply delta");
+       free(delta_data);
+       free(base);
+       *sizep = result_size;
+       return result;
+}
+
+static void *unpack_entry(unsigned long offset, unsigned long *sizep)
+{
+       unsigned long size;
+       enum object_type kind;
+
+       offset = unpack_object_header(offset, &kind, &size);
+       switch (kind) {
+       case OBJ_DELTA:
+               return unpack_delta_entry(offset, size, sizep);
+       case OBJ_COMMIT:
+       case OBJ_TREE:
+       case OBJ_BLOB:
+       case OBJ_TAG:
+               *sizep = size;
+               return unpack_non_delta_entry(offset, size);
+       default:
+               die("I created an object I can't read!");
+       }
+}
+
 static const char *get_mode(const char *str, unsigned int *modep)
 {
        unsigned char c;
@@ -680,7 +816,6 @@ static void load_tree(struct tree_entry *root)
        unsigned long size;
        char *buf;
        const char *c;
-       char type[20];
 
        root->tree = t = new_tree_content(8);
        if (!memcmp(root->sha1, null_sha1, 20))
@@ -688,11 +823,14 @@ static void load_tree(struct tree_entry *root)
 
        myoe = find_object(root->sha1);
        if (myoe) {
-               die("FIXME");
+               if (myoe->type != OBJ_TREE)
+                       die("Not a tree: %s", sha1_to_hex(root->sha1));
+               buf = unpack_entry(myoe->offset, &size);
        } else {
+               char type[20];
                buf = read_sha1_file(root->sha1, type, &size);
                if (!buf || strcmp(type, tree_type))
-                       die("Can't load existing tree %s", sha1_to_hex(root->sha1));
+                       die("Can't load tree %s", sha1_to_hex(root->sha1));
        }
 
        c = buf;
@@ -869,7 +1007,7 @@ static void init_pack_header()
        hdr.hdr_entries = 0;
 
        ywrite(pack_fd, &hdr, sizeof(hdr));
-       pack_offset = sizeof(hdr);
+       pack_size = sizeof(hdr);
 }
 
 static void fixup_header_footer()
@@ -970,6 +1108,51 @@ static void dump_branches()
        }
 }
 
+static void dump_tags()
+{
+       static const char *msg = "fast-import";
+       struct tag *t;
+       struct ref_lock *lock;
+       char path[PATH_MAX];
+
+       for (t = first_tag; t; t = t->next_tag) {
+               sprintf(path, "refs/tags/%s", t->name);
+               lock = lock_any_ref_for_update(path, NULL, 0);
+               if (!lock || write_ref_sha1(lock, t->sha1, msg) < 0)
+                       die("Can't write %s", path);
+       }
+}
+
+static void dump_marks_helper(FILE *f,
+       unsigned long base,
+       struct mark_set *m)
+{
+       int k;
+       if (m->shift) {
+               for (k = 0; k < 1024; k++) {
+                       if (m->data.sets[k])
+                               dump_marks_helper(f, (base + k) << m->shift,
+                                       m->data.sets[k]);
+               }
+       } else {
+               for (k = 0; k < 1024; k++) {
+                       if (m->data.marked[k])
+                               fprintf(f, "%lu,%s\n", base + k,
+                                       sha1_to_hex(m->data.marked[k]->sha1));
+               }
+       }
+}
+
+static void dump_marks()
+{
+       if (mark_file)
+       {
+               FILE *f = fopen(mark_file, "w");
+               dump_marks_helper(f, 0, marks);
+               fclose(f);
+       }
+}
+
 static void read_next_command()
 {
        read_line(&command_buf, stdin, '\n');
@@ -1026,7 +1209,8 @@ static void cmd_new_blob()
 
 static void unload_one_branch()
 {
-       while (cur_active_branches >= max_active_branches) {
+       while (cur_active_branches
+               && cur_active_branches >= max_active_branches) {
                unsigned long min_commit = ULONG_MAX;
                struct branch *e, *l = NULL, *p = NULL;
 
@@ -1143,6 +1327,69 @@ static void file_change_d(struct branch *b)
                free(p_uq);
 }
 
+static void cmd_from(struct branch *b)
+{
+       const char *from, *endp;
+       char *str_uq;
+       struct branch *s;
+
+       if (strncmp("from ", command_buf.buf, 5))
+               return;
+
+       if (b->last_commit)
+               die("Can't reinitailize branch %s", b->name);
+
+       from = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(from, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after string in: %s", command_buf.buf);
+               from = str_uq;
+       }
+
+       s = lookup_branch(from);
+       if (b == s)
+               die("Can't create a branch from itself: %s", b->name);
+       else if (s) {
+               memcpy(b->sha1, s->sha1, 20);
+               memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
+       } else if (*from == ':') {
+               unsigned long idnum = strtoul(from + 1, NULL, 10);
+               struct object_entry *oe = find_mark(idnum);
+               unsigned long size;
+               char *buf;
+               if (oe->type != OBJ_COMMIT)
+                       die("Mark :%lu not a commit", idnum);
+               memcpy(b->sha1, oe->sha1, 20);
+               buf = unpack_entry(oe->offset, &size);
+               if (!buf || size < 46)
+                       die("Not a valid commit: %s", from);
+               if (memcmp("tree ", buf, 5)
+                       || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+                       die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+               free(buf);
+       } else if (!get_sha1(from, b->sha1)) {
+               if (!memcmp(b->sha1, null_sha1, 20))
+                       memcpy(b->branch_tree.sha1, null_sha1, 20);
+               else {
+                       unsigned long size;
+                       char *buf;
+
+                       buf = read_object_with_reference(b->sha1,
+                               type_names[OBJ_COMMIT], &size, b->sha1);
+                       if (!buf || size < 46)
+                               die("Not a valid commit: %s", from);
+                       if (memcmp("tree ", buf, 5)
+                               || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+                               die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+                       free(buf);
+               }
+       } else
+               die("Invalid ref name or SHA1 expression: %s", from);
+
+       read_next_command();
+}
+
 static void cmd_new_commit()
 {
        struct branch *b;
@@ -1165,7 +1412,7 @@ static void cmd_new_commit()
        }
        b = lookup_branch(sp);
        if (!b)
-               die("Branch not declared: %s", sp);
+               b = new_branch(sp);
        if (str_uq)
                free(str_uq);
 
@@ -1182,16 +1429,17 @@ static void cmd_new_commit()
        if (!committer)
                die("Expected committer but didn't get one");
        msg = cmd_data(&msglen);
+       read_next_command();
+       cmd_from(b);
 
        /* ensure the branch is active/loaded */
-       if (!b->branch_tree.tree) {
+       if (!b->branch_tree.tree || !max_active_branches) {
                unload_one_branch();
                load_branch(b);
        }
 
        /* file_change* */
        for (;;) {
-               read_next_command();
                if (1 == command_buf.len)
                        break;
                else if (!strncmp("M ", command_buf.buf, 2))
@@ -1200,6 +1448,7 @@ static void cmd_new_commit()
                        file_change_d(b);
                else
                        die("Unsupported file_change: %s", command_buf.buf);
+               read_next_command();
        }
 
        /* build the tree and the commit */
@@ -1229,85 +1478,104 @@ static void cmd_new_commit()
        b->last_commit = object_count_by_type[OBJ_COMMIT];
 }
 
-static void cmd_new_branch()
+static void cmd_new_tag()
 {
-       struct branch *b;
        char *str_uq;
        const char *endp;
        char *sp;
+       const char *from;
+       char *tagger;
+       struct branch *s;
+       void *msg;
+       size_t msglen;
+       char *body;
+       struct tag *t;
+       unsigned char sha1[20];
 
-       /* Obtain the new branch name from the rest of our command */
+       /* Obtain the new tag name from the rest of our command */
        sp = strchr(command_buf.buf, ' ') + 1;
        str_uq = unquote_c_style(sp, &endp);
        if (str_uq) {
                if (*endp)
-                       die("Garbage after ref in: %s", command_buf.buf);
+                       die("Garbage after tag name in: %s", command_buf.buf);
                sp = str_uq;
        }
-       b = new_branch(sp);
+       t = pool_alloc(sizeof(struct tag));
+       t->next_tag = NULL;
+       t->name = pool_strdup(sp);
+       if (last_tag)
+               last_tag->next_tag = t;
+       else
+               first_tag = t;
+       last_tag = t;
        if (str_uq)
                free(str_uq);
        read_next_command();
 
        /* from ... */
-       if (!strncmp("from ", command_buf.buf, 5)) {
-               const char *from;
-               struct branch *s;
-
-               from = strchr(command_buf.buf, ' ') + 1;
-               str_uq = unquote_c_style(from, &endp);
-               if (str_uq) {
-                       if (*endp)
-                               die("Garbage after string in: %s", command_buf.buf);
-                       from = str_uq;
-               }
-
-               s = lookup_branch(from);
-               if (b == s)
-                       die("Can't create a branch from itself: %s", b->name);
-               else if (s) {
-                       memcpy(b->sha1, s->sha1, 20);
-                       memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
-               } else if (*from == ':') {
-                       unsigned long idnum = strtoul(from + 1, NULL, 10);
-                       struct object_entry *oe = find_mark(idnum);
-                       if (oe->type != OBJ_COMMIT)
-                               die("Mark :%lu not a commit", idnum);
-                       memcpy(b->sha1, oe->sha1, 20);
-                       memcpy(b->branch_tree.sha1, null_sha1, 20);
-               } else if (!get_sha1(from, b->sha1)) {
-                       if (!memcmp(b->sha1, null_sha1, 20))
-                               memcpy(b->branch_tree.sha1, null_sha1, 20);
-                       else {
-                               unsigned long size;
-                               char *buf;
-
-                               buf = read_object_with_reference(b->sha1,
-                                       type_names[OBJ_COMMIT], &size, b->sha1);
-                               if (!buf || size < 46)
-                                       die("Not a valid commit: %s", from);
-                               if (memcmp("tree ", buf, 5)
-                                       || get_sha1_hex(buf + 5, b->branch_tree.sha1))
-                                       die("The commit %s is corrupt", sha1_to_hex(b->sha1));
-                               free(buf);
-                       }
-               } else
-                       die("Invalid ref name or SHA1 expression: %s", from);
+       if (strncmp("from ", command_buf.buf, 5))
+               die("Expected from command, got %s", command_buf.buf);
 
-               if (str_uq)
-                       free(str_uq);
-               read_next_command();
-       } else {
-               memcpy(b->sha1, null_sha1, 20);
-               memcpy(b->branch_tree.sha1, null_sha1, 20);
+       from = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(from, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after string in: %s", command_buf.buf);
+               from = str_uq;
        }
 
-       if (command_buf.eof || command_buf.len > 1)
-               die("An lf did not terminate the branch command as expected.");
+       s = lookup_branch(from);
+       if (s) {
+               memcpy(sha1, s->sha1, 20);
+       } else if (*from == ':') {
+               unsigned long idnum = strtoul(from + 1, NULL, 10);
+               struct object_entry *oe = find_mark(idnum);
+               if (oe->type != OBJ_COMMIT)
+                       die("Mark :%lu not a commit", idnum);
+               memcpy(sha1, oe->sha1, 20);
+       } else if (!get_sha1(from, sha1)) {
+               unsigned long size;
+               char *buf;
+
+               buf = read_object_with_reference(sha1,
+                       type_names[OBJ_COMMIT], &size, sha1);
+               if (!buf || size < 46)
+                       die("Not a valid commit: %s", from);
+               free(buf);
+       } else
+               die("Invalid ref name or SHA1 expression: %s", from);
+
+       if (str_uq)
+               free(str_uq);
+       read_next_command();
+
+       /* tagger ... */
+       if (strncmp("tagger ", command_buf.buf, 7))
+               die("Expected tagger command, got %s", command_buf.buf);
+       tagger = strdup(command_buf.buf);
+
+       /* tag payload/message */
+       read_next_command();
+       msg = cmd_data(&msglen);
+
+       /* build the tag object */
+       body = xmalloc(67 + strlen(t->name) + strlen(tagger) + msglen);
+       sp = body;
+       sp += sprintf(sp, "object %s\n", sha1_to_hex(sha1));
+       sp += sprintf(sp, "type %s\n", type_names[OBJ_COMMIT]);
+       sp += sprintf(sp, "tag %s\n", t->name);
+       sp += sprintf(sp, "%s\n\n", tagger);
+       memcpy(sp, msg, msglen);
+       sp += msglen;
+       free(tagger);
+       free(msg);
+
+       store_object(OBJ_TAG, body, sp - body, NULL, t->sha1, 0);
+       free(body);
 }
 
 static const char fast_import_usage[] =
-"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] temp.pack";
+"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] temp.pack";
 
 int main(int argc, const char **argv)
 {
@@ -1332,6 +1600,8 @@ int main(int argc, const char **argv)
                        max_depth = strtoul(a + 8, NULL, 0);
                else if (!strncmp(a, "--active-branches=", 18))
                        max_active_branches = strtoul(a + 18, NULL, 0);
+               else if (!strncmp(a, "--export-marks=", 15))
+                       mark_file = a + 15;
                else
                        die("unknown option %s", a);
        }
@@ -1363,10 +1633,10 @@ int main(int argc, const char **argv)
                        break;
                else if (!strcmp("blob", command_buf.buf))
                        cmd_new_blob();
-               else if (!strncmp("branch ", command_buf.buf, 7))
-                       cmd_new_branch();
                else if (!strncmp("commit ", command_buf.buf, 7))
                        cmd_new_commit();
+               else if (!strncmp("tag ", command_buf.buf, 4))
+                       cmd_new_tag();
                else
                        die("Unsupported command: %s", command_buf.buf);
        }
@@ -1375,6 +1645,8 @@ int main(int argc, const char **argv)
        close(pack_fd);
        write_index(idx_name);
        dump_branches();
+       dump_tags();
+       dump_marks();
 
        fprintf(stderr, "%s statistics:\n", argv[0]);
        fprintf(stderr, "---------------------------------------------------\n");
@@ -1390,6 +1662,7 @@ int main(int argc, const char **argv)
        fprintf(stderr, "Memory total:    %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
        fprintf(stderr, "       pools:    %10lu KiB\n", total_allocd/1024);
        fprintf(stderr, "     objects:    %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
+       fprintf(stderr, "Pack remaps:     %10lu\n", remap_count);
        fprintf(stderr, "---------------------------------------------------\n");
 
        stat(pack_name, &sb);