Added option to export the marks table when fast-import terminates.
[gitweb.git] / fast-import.c
index 95b84f57e50fad863c65c52e940d730235fd7616..d61da3adecd3c2725a05d65728f09ce5eb5c8c4d 100644 (file)
@@ -5,56 +5,88 @@ Format of STDIN stream:
 
   cmd ::= new_blob
         | new_commit
-        | new_branch
         | new_tag
         ;
 
-  new_blob ::= 'blob' blob_data;
+  new_blob ::= 'blob' lf
+       mark?
+    file_content;
+  file_content ::= data;
 
-  new_commit ::= 'comt' ref_name author_committer_msg
+  new_commit ::= 'commit' sp ref_str lf
+    mark?
+    ('author' sp name '<' email '>' ts tz lf)?
+    'committer' sp name '<' email '>' ts tz lf
+    commit_msg
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
     file_change*
-    '0';
-
-  new_branch ::= 'brch' dst_ref_name src_ref_name;
-  dst_ref_name ::= ref_name;
-  src_ref_name ::= ref_name | sha1_exp;
+    lf;
+  commit_msg ::= data;
 
-  new_tag ::= 'tagg' ref_name tag_name tagger_msg;
-
-  file_change ::= 'M' path_name hexsha1
-                | 'D' path_name
+  file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
+                | 'D' sp path_str lf
                 ;
-
-  author_committer_msg ::= len32
-    'author' sp name '<' email '>' ts tz lf
-    'committer' sp name '<' email '>' ts tz lf
-    lf
-    binary_data;
-
-  tagger_msg ::= len32
-    'tagger' sp name '<' email '>' ts tz lf
-    lf
-    binary_data;
-
-  blob_data ::= len32 binary_data; # max len is 2^32-1
-  path_name ::= len32 path;        # max len is PATH_MAX-1
-  ref_name  ::= len32 ref;         # max len is PATH_MAX-1
-  tag_name  ::= len32 tag;         # max len is PATH_MAX-1
-  sha1_exp  ::= len32 sha1exp;     # max len is PATH_MAX-1
-
-  len32 ::= # unsigned 32 bit value, native format;
+  mode ::= '644' | '755';
+
+  new_tag ::= 'tag' sp tag_str lf
+    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
+       'tagger' sp name '<' email '>' ts tz lf
+    tag_msg;
+  tag_msg ::= data;
+
+     # note: the first idnum in a stream should be 1 and subsequent
+     # idnums should not have gaps between values as this will cause
+     # the stream parser to reserve space for the gapped values.  An
+        # idnum can be updated in the future to a new object by issuing
+     # a new mark directive with the old idnum.
+        #
+  mark ::= 'mark' sp idnum lf;
+
+     # note: declen indicates the length of binary_data in bytes.
+     # declen does not include the lf preceeding or trailing the
+     # binary data.
+     #
+  data ::= 'data' sp declen lf
+    binary_data
+       lf;
+
+     # note: quoted strings are C-style quoting supporting \c for
+     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
+        # is the signed byte value in octal.  Note that the only
+     # characters which must actually be escaped to protect the
+     # stream formatting is: \, " and LF.  Otherwise these values
+        # are UTF8.
+     #
+  ref_str     ::= ref     | '"' quoted(ref)     '"' ;
+  sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
+  tag_str     ::= tag     | '"' quoted(tag)     '"' ;
+  path_str    ::= path    | '"' quoted(path)    '"' ;
+
+  declen ::= # unsigned 32 bit value, ascii base10 notation;
   binary_data ::= # file content, not interpreted;
+
   sp ::= # ASCII space character;
   lf ::= # ASCII newline (LF) character;
-  path ::= # GIT style file path, e.g. "a/b/c";
-  ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
-  tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
+
+     # note: a colon (':') must precede the numerical value assigned to
+        # an idnum.  This is to distinguish it from a ref or tag name as
+     # GIT does not permit ':' in ref or tag strings.
+        #
+  idnum   ::= ':' declen;
+  path    ::= # GIT style file path, e.g. "a/b/c";
+  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
+  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
   sha1exp ::= # Any valid GIT SHA1 expression;
   hexsha1 ::= # SHA1 in hexadecimal format;
-  name ::= # valid GIT author/committer name;
+
+     # note: name and email are UTF8 strings, however name must not
+        # contain '<' or lf and email must not contain any of the
+     # following: '<', '>', lf.
+        #
+  name  ::= # valid GIT author/committer name;
   email ::= # valid GIT author/committer email;
-  ts ::= # time since the epoch in seconds, ascii decimal;
-  tz ::= # GIT style timezone;
+  ts    ::= # time since the epoch in seconds, ascii base10 notation;
+  tz    ::= # GIT style timezone;
 */
 
 #include "builtin.h"
@@ -66,6 +98,8 @@ Format of STDIN stream:
 #include "pack.h"
 #include "refs.h"
 #include "csum-file.h"
+#include "strbuf.h"
+#include "quote.h"
 
 struct object_entry
 {
@@ -83,6 +117,15 @@ struct object_entry_pool
        struct object_entry entries[FLEX_ARRAY]; /* more */
 };
 
+struct mark_set
+{
+       int shift;
+       union {
+               struct object_entry *marked[1024];
+               struct mark_set *sets[1024];
+       } data;
+};
+
 struct last_object
 {
        void *data;
@@ -138,13 +181,23 @@ struct branch
        unsigned char sha1[20];
 };
 
+struct tag
+{
+       struct tag *next_tag;
+       const char *name;
+       unsigned char sha1[20];
+};
+
 
 /* Stats and misc. counters */
-static int max_depth = 10;
+static unsigned long max_depth = 10;
 static unsigned long alloc_count;
 static unsigned long branch_count;
+static unsigned long branch_load_count;
+static unsigned long remap_count;
 static unsigned long object_count;
 static unsigned long duplicate_count;
+static unsigned long marks_set_count;
 static unsigned long object_count_by_type[9];
 static unsigned long duplicate_count_by_type[9];
 
@@ -153,20 +206,24 @@ static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
 static size_t total_allocd;
 static struct mem_pool *mem_pool;
 
-/* atom management */
+/* Atom management */
 static unsigned int atom_table_sz = 4451;
 static unsigned int atom_cnt;
 static struct atom_str **atom_table;
 
 /* The .pack file being generated */
 static int pack_fd;
-static unsigned long pack_offset;
+static unsigned long pack_size;
 static unsigned char pack_sha1[20];
+static void* pack_base;
+static size_t pack_mlen;
 
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 1000;
 static struct object_entry_pool *blocks;
 static struct object_entry *object_table[1 << 16];
+static struct mark_set *marks;
+static const char* mark_file;
 
 /* Our last blob */
 static struct last_object last_blob;
@@ -178,12 +235,20 @@ static unsigned int avail_tree_table_sz = 100;
 static struct avail_tree_content **avail_tree_table;
 
 /* Branch data */
-static unsigned int max_active_branches = 5;
-static unsigned int cur_active_branches;
-static unsigned int branch_table_sz = 1039;
+static unsigned long max_active_branches = 5;
+static unsigned long cur_active_branches;
+static unsigned long branch_table_sz = 1039;
 static struct branch **branch_table;
 static struct branch *active_branches;
 
+/* Tag data */
+static struct tag *first_tag;
+static struct tag *last_tag;
+
+/* Input stream parsing */
+static struct strbuf command_buf;
+static unsigned long next_mark;
+
 
 static void alloc_objects(int cnt)
 {
@@ -274,6 +339,9 @@ static void* pool_alloc(size_t len)
        }
 
        r = p->next_free;
+       /* round out to a pointer alignment */
+       if (len & (sizeof(void*) - 1))
+               len += sizeof(void*) - (len & (sizeof(void*) - 1));
        p->next_free += len;
        return r;
 }
@@ -293,6 +361,48 @@ static char* pool_strdup(const char *s)
        return r;
 }
 
+static void insert_mark(unsigned long idnum, struct object_entry *oe)
+{
+       struct mark_set *s = marks;
+       while ((idnum >> s->shift) >= 1024) {
+               s = pool_calloc(1, sizeof(struct mark_set));
+               s->shift = marks->shift + 10;
+               s->data.sets[0] = marks;
+               marks = s;
+       }
+       while (s->shift) {
+               unsigned long i = idnum >> s->shift;
+               idnum -= i << s->shift;
+               if (!s->data.sets[i]) {
+                       s->data.sets[i] = pool_calloc(1, sizeof(struct mark_set));
+                       s->data.sets[i]->shift = s->shift - 10;
+               }
+               s = s->data.sets[i];
+       }
+       if (!s->data.marked[idnum])
+               marks_set_count++;
+       s->data.marked[idnum] = oe;
+}
+
+static struct object_entry* find_mark(unsigned long idnum)
+{
+       unsigned long orig_idnum = idnum;
+       struct mark_set *s = marks;
+       struct object_entry *oe = NULL;
+       if ((idnum >> s->shift) < 1024) {
+               while (s && s->shift) {
+                       unsigned long i = idnum >> s->shift;
+                       idnum -= i << s->shift;
+                       s = s->data.sets[i];
+               }
+               if (s)
+                       oe = s->data.marked[idnum];
+       }
+       if (!oe)
+               die("mark :%lu not declared", orig_idnum);
+       return oe;
+}
+
 static struct atom_str* to_atom(const char *s, size_t len)
 {
        unsigned int hc = hc_str(s, len) % atom_table_sz;
@@ -330,6 +440,8 @@ static struct branch* new_branch(const char *name)
 
        if (b)
                die("Invalid attempt to create duplicate branch: %s", name);
+       if (check_ref_format(name))
+               die("Branch name doesn't conform to GIT standards: %s", name);
 
        b = pool_calloc(1, sizeof(struct branch));
        b->name = pool_strdup(name);
@@ -376,11 +488,16 @@ static void release_tree_content(struct tree_content *t)
 {
        struct avail_tree_content *f = (struct avail_tree_content*)t;
        unsigned int hc = hc_entries(f->entry_capacity);
+       f->next_avail = avail_tree_table[hc];
+       avail_tree_table[hc] = f;
+}
+
+static void release_tree_content_recursive(struct tree_content *t)
+{
        unsigned int i;
        for (i = 0; i < t->entry_count; i++)
                release_tree_entry(t->entries[i]);
-       f->next_avail = avail_tree_table[hc];
-       avail_tree_table[hc] = f;
+       release_tree_content(t);
 }
 
 static struct tree_content* grow_tree_content(
@@ -400,6 +517,7 @@ static struct tree_entry* new_tree_entry()
 
        if (!avail_tree_entry) {
                unsigned int n = tree_entry_alloc;
+               total_allocd += n * sizeof(struct tree_entry);
                avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
                while (n--) {
                        *((void**)e) = e + 1;
@@ -415,7 +533,7 @@ static struct tree_entry* new_tree_entry()
 static void release_tree_entry(struct tree_entry *e)
 {
        if (e->tree)
-               release_tree_content(e->tree);
+               release_tree_content_recursive(e->tree);
        *((void**)e) = avail_tree_entry;
        avail_tree_entry = e;
 }
@@ -433,22 +551,6 @@ static void yread(int fd, void *buffer, size_t length)
        }
 }
 
-static int optional_read(int fd, void *buffer, size_t length)
-{
-       ssize_t ret = 0;
-       while (ret < length) {
-               ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
-               if (!size && !ret)
-                       return 1;
-               if (!size)
-                       die("Read from descriptor %i: end of stream", fd);
-               if (size < 0)
-                       die("Read from descriptor %i: %s", fd, strerror(errno));
-               ret += size;
-       }
-       return 0;
-}
-
 static void ywrite(int fd, void *buffer, size_t length)
 {
        ssize_t ret = 0;
@@ -462,24 +564,9 @@ static void ywrite(int fd, void *buffer, size_t length)
        }
 }
 
-static const char* read_path()
-{
-       static char sn[PATH_MAX];
-       unsigned long slen;
-
-       yread(0, &slen, 4);
-       if (!slen)
-               die("Expected string command parameter, didn't find one");
-       if (slen > (PATH_MAX - 1))
-               die("Can't handle excessive string length %lu", slen);
-       yread(0, sn, slen);
-       sn[slen] = 0;
-       return sn;
-}
-
-static unsigned long encode_header(
+static size_t encode_header(
        enum object_type type,
-       unsigned long size,
+       size_t size,
        unsigned char *hdr)
 {
        int n = 1;
@@ -503,9 +590,10 @@ static unsigned long encode_header(
 static int store_object(
        enum object_type type,
        void *dat,
-       unsigned long datlen,
+       size_t datlen,
        struct last_object *last,
-       unsigned char *sha1out)
+       unsigned char *sha1out,
+       unsigned long mark)
 {
        void *out, *delta;
        struct object_entry *e;
@@ -524,13 +612,15 @@ static int store_object(
                memcpy(sha1out, sha1, sizeof(sha1));
 
        e = insert_object(sha1);
+       if (mark)
+               insert_mark(mark, e);
        if (e->offset) {
                duplicate_count++;
                duplicate_count_by_type[type]++;
                return 1;
        }
        e->type = type;
-       e->offset = pack_offset;
+       e->offset = pack_size;
        object_count++;
        object_count_by_type[type]++;
 
@@ -551,7 +641,7 @@ static int store_object(
                hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
                ywrite(pack_fd, hdr, hdrlen);
                ywrite(pack_fd, last->sha1, sizeof(sha1));
-               pack_offset += hdrlen + sizeof(sha1);
+               pack_size += hdrlen + sizeof(sha1);
        } else {
                if (last)
                        last->depth = 0;
@@ -559,7 +649,7 @@ static int store_object(
                s.avail_in = datlen;
                hdrlen = encode_header(type, datlen, hdr);
                ywrite(pack_fd, hdr, hdrlen);
-               pack_offset += hdrlen;
+               pack_size += hdrlen;
        }
 
        s.avail_out = deflateBound(&s, s.avail_in);
@@ -569,7 +659,7 @@ static int store_object(
        deflateEnd(&s);
 
        ywrite(pack_fd, out, s.total_out);
-       pack_offset += s.total_out;
+       pack_size += s.total_out;
 
        free(out);
        if (delta)
@@ -584,6 +674,127 @@ static int store_object(
        return 0;
 }
 
+static void* map_pack(unsigned long offset)
+{
+       if (offset >= pack_size)
+               die("object offset outside of pack file");
+       if (offset >= pack_mlen) {
+               if (pack_base)
+                       munmap(pack_base, pack_mlen);
+               /* round out how much we map to 16 MB units */
+               pack_mlen = pack_size;
+               if (pack_mlen & ((1 << 24) - 1))
+                       pack_mlen = ((pack_mlen >> 24) + 1) << 24;
+               pack_base = mmap(NULL,pack_mlen,PROT_READ,MAP_SHARED,pack_fd,0);
+               if (pack_base == MAP_FAILED)
+                       die("Failed to map generated pack: %s", strerror(errno));
+               remap_count++;
+       }
+       return (char*)pack_base + offset;
+}
+
+static unsigned long unpack_object_header(unsigned long offset,
+       enum object_type *type,
+       unsigned long *sizep)
+{
+       unsigned shift;
+       unsigned char c;
+       unsigned long size;
+
+       c = *(unsigned char*)map_pack(offset++);
+       *type = (c >> 4) & 7;
+       size = c & 15;
+       shift = 4;
+       while (c & 0x80) {
+               c = *(unsigned char*)map_pack(offset++);
+               size += (c & 0x7f) << shift;
+               shift += 7;
+       }
+       *sizep = size;
+       return offset;
+}
+
+static void *unpack_non_delta_entry(unsigned long o, unsigned long sz)
+{
+       z_stream stream;
+       unsigned char *result;
+
+       result = xmalloc(sz + 1);
+       result[sz] = 0;
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_in = map_pack(o);
+       stream.avail_in = pack_mlen - o;
+       stream.next_out = result;
+       stream.avail_out = sz;
+
+       inflateInit(&stream);
+       for (;;) {
+               int st = inflate(&stream, Z_FINISH);
+               if (st == Z_STREAM_END)
+                       break;
+               if (st == Z_OK) {
+                       o = stream.next_in - (unsigned char*)pack_base;
+                       stream.next_in = map_pack(o);
+                       stream.avail_in = pack_mlen - o;
+                       continue;
+               }
+               die("Error from zlib during inflate.");
+       }
+       inflateEnd(&stream);
+       if (stream.total_out != sz)
+               die("Error after inflate: sizes mismatch");
+       return result;
+}
+
+static void *unpack_entry(unsigned long offset, unsigned long *sizep);
+
+static void *unpack_delta_entry(unsigned long offset,
+       unsigned long delta_size,
+       unsigned long *sizep)
+{
+       struct object_entry *base_oe;
+       unsigned char *base_sha1;
+       void *delta_data, *base, *result;
+       unsigned long base_size, result_size;
+
+       base_sha1 = (unsigned char*)map_pack(offset + 20) - 20;
+       base_oe = find_object(base_sha1);
+       if (!base_oe)
+               die("I'm broken; I can't find a base I know must be here.");
+       base = unpack_entry(base_oe->offset, &base_size);
+       delta_data = unpack_non_delta_entry(offset + 20, delta_size);
+       result = patch_delta(base, base_size,
+                            delta_data, delta_size,
+                            &result_size);
+       if (!result)
+               die("failed to apply delta");
+       free(delta_data);
+       free(base);
+       *sizep = result_size;
+       return result;
+}
+
+static void *unpack_entry(unsigned long offset, unsigned long *sizep)
+{
+       unsigned long size;
+       enum object_type kind;
+
+       offset = unpack_object_header(offset, &kind, &size);
+       switch (kind) {
+       case OBJ_DELTA:
+               return unpack_delta_entry(offset, size, sizep);
+       case OBJ_COMMIT:
+       case OBJ_TREE:
+       case OBJ_BLOB:
+       case OBJ_TAG:
+               *sizep = size;
+               return unpack_non_delta_entry(offset, size);
+       default:
+               die("I created an object I can't read!");
+       }
+}
+
 static const char *get_mode(const char *str, unsigned int *modep)
 {
        unsigned char c;
@@ -605,7 +816,6 @@ static void load_tree(struct tree_entry *root)
        unsigned long size;
        char *buf;
        const char *c;
-       char type[20];
 
        root->tree = t = new_tree_content(8);
        if (!memcmp(root->sha1, null_sha1, 20))
@@ -613,11 +823,14 @@ static void load_tree(struct tree_entry *root)
 
        myoe = find_object(root->sha1);
        if (myoe) {
-               die("FIXME");
+               if (myoe->type != OBJ_TREE)
+                       die("Not a tree: %s", sha1_to_hex(root->sha1));
+               buf = unpack_entry(myoe->offset, &size);
        } else {
+               char type[20];
                buf = read_sha1_file(root->sha1, type, &size);
                if (!buf || strcmp(type, tree_type))
-                       die("Can't load existing tree %s", sha1_to_hex(root->sha1));
+                       die("Can't load tree %s", sha1_to_hex(root->sha1));
        }
 
        c = buf;
@@ -677,7 +890,7 @@ static void store_tree(struct tree_entry *root)
                memcpy(c, e->sha1, 20);
                c += 20;
        }
-       store_object(OBJ_TREE, buf, c - buf, NULL, root->sha1);
+       store_object(OBJ_TREE, buf, c - buf, NULL, root->sha1, 0);
        free(buf);
 }
 
@@ -707,7 +920,7 @@ static int tree_content_set(
                                e->mode = mode;
                                memcpy(e->sha1, sha1, 20);
                                if (e->tree) {
-                                       release_tree_content(e->tree);
+                                       release_tree_content_recursive(e->tree);
                                        e->tree = NULL;
                                }
                                memcpy(root->sha1, null_sha1, 20);
@@ -787,15 +1000,14 @@ static int tree_content_remove(struct tree_entry *root, const char *p)
 
 static void init_pack_header()
 {
-       const char* magic = "PACK";
-       unsigned long version = 3;
-       unsigned long zero = 0;
+       struct pack_header hdr;
 
-       version = htonl(version);
-       ywrite(pack_fd, (char*)magic, 4);
-       ywrite(pack_fd, &version, 4);
-       ywrite(pack_fd, &zero, 4);
-       pack_offset = 4 * 3;
+       hdr.hdr_signature = htonl(PACK_SIGNATURE);
+       hdr.hdr_version = htonl(2);
+       hdr.hdr_entries = 0;
+
+       ywrite(pack_fd, &hdr, sizeof(hdr));
+       pack_size = sizeof(hdr);
 }
 
 static void fixup_header_footer()
@@ -896,22 +1108,109 @@ static void dump_branches()
        }
 }
 
+static void dump_tags()
+{
+       static const char *msg = "fast-import";
+       struct tag *t;
+       struct ref_lock *lock;
+       char path[PATH_MAX];
+
+       for (t = first_tag; t; t = t->next_tag) {
+               sprintf(path, "refs/tags/%s", t->name);
+               lock = lock_any_ref_for_update(path, NULL, 0);
+               if (!lock || write_ref_sha1(lock, t->sha1, msg) < 0)
+                       die("Can't write %s", path);
+       }
+}
+
+static void dump_marks_helper(FILE *f,
+       unsigned long base,
+       struct mark_set *m)
+{
+       int k;
+       if (m->shift) {
+               for (k = 0; k < 1024; k++) {
+                       if (m->data.sets[k])
+                               dump_marks_helper(f, (base + k) << m->shift,
+                                       m->data.sets[k]);
+               }
+       } else {
+               for (k = 0; k < 1024; k++) {
+                       if (m->data.marked[k])
+                               fprintf(f, "%lu,%s\n", base + k,
+                                       sha1_to_hex(m->data.marked[k]->sha1));
+               }
+       }
+}
+
+static void dump_marks()
+{
+       if (mark_file)
+       {
+               FILE *f = fopen(mark_file, "w");
+               dump_marks_helper(f, 0, marks);
+               fclose(f);
+       }
+}
+
+static void read_next_command()
+{
+       read_line(&command_buf, stdin, '\n');
+}
+
+static void cmd_mark()
+{
+       if (!strncmp("mark :", command_buf.buf, 6)) {
+               next_mark = strtoul(command_buf.buf + 6, NULL, 10);
+               read_next_command();
+       }
+       else
+               next_mark = 0;
+}
+
+static void* cmd_data (size_t *size)
+{
+       size_t n = 0;
+       void *buffer;
+       size_t length;
+
+       if (strncmp("data ", command_buf.buf, 5))
+               die("Expected 'data n' command, found: %s", command_buf.buf);
+
+       length = strtoul(command_buf.buf + 5, NULL, 10);
+       buffer = xmalloc(length);
+
+       while (n < length) {
+               size_t s = fread((char*)buffer + n, 1, length - n, stdin);
+               if (!s && feof(stdin))
+                       die("EOF in data (%lu bytes remaining)", length - n);
+               n += s;
+       }
+
+       if (fgetc(stdin) != '\n')
+               die("An lf did not trail the binary data as expected.");
+
+       *size = length;
+       return buffer;
+}
+
 static void cmd_new_blob()
 {
-       unsigned long datlen;
-       unsigned char sha1[20];
-       void *dat;
+       size_t l;
+       void *d;
+
+       read_next_command();
+       cmd_mark();
+       d = cmd_data(&l);
 
-       yread(0, &datlen, 4);
-       dat = xmalloc(datlen);
-       yread(0, dat, datlen);
-       if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1))
-               free(dat);
+       if (store_object(OBJ_BLOB, d, l, &last_blob, NULL, next_mark))
+               free(d);
 }
 
 static void unload_one_branch()
 {
-       while (cur_active_branches >= max_active_branches) {
+       while (cur_active_branches
+               && cur_active_branches >= max_active_branches) {
                unsigned long min_commit = ULONG_MAX;
                struct branch *e, *l = NULL, *p = NULL;
 
@@ -932,7 +1231,7 @@ static void unload_one_branch()
                }
                e->active_next_branch = NULL;
                if (e->branch_tree.tree) {
-                       release_tree_content(e->branch_tree.tree);
+                       release_tree_content_recursive(e->branch_tree.tree);
                        e->branch_tree.tree = NULL;
                }
                cur_active_branches--;
@@ -945,132 +1244,344 @@ static void load_branch(struct branch *b)
        b->active_next_branch = active_branches;
        active_branches = b;
        cur_active_branches++;
+       branch_load_count++;
 }
 
 static void file_change_m(struct branch *b)
 {
-       const char *path = read_path();
+       const char *p = command_buf.buf + 2;
+       char *p_uq;
+       const char *endp;
        struct object_entry *oe;
-       char hexsha1[41];
        unsigned char sha1[20];
+       unsigned int mode;
        char type[20];
 
-       yread(0, hexsha1, 40);
-       hexsha1[40] = 0;
+       p = get_mode(p, &mode);
+       if (!p)
+               die("Corrupt mode: %s", command_buf.buf);
+       switch (mode) {
+       case S_IFREG | 0644:
+       case S_IFREG | 0755:
+       case S_IFLNK:
+       case 0644:
+       case 0755:
+               /* ok */
+               break;
+       default:
+               die("Corrupt mode: %s", command_buf.buf);
+       }
+
+       if (*p == ':') {
+               char *x;
+               oe = find_mark(strtoul(p + 1, &x, 10));
+               p = x;
+       } else {
+               if (get_sha1_hex(p, sha1))
+                       die("Invalid SHA1: %s", command_buf.buf);
+               oe = find_object(sha1);
+               p += 40;
+       }
+       if (*p++ != ' ')
+               die("Missing space after SHA1: %s", command_buf.buf);
+
+       p_uq = unquote_c_style(p, &endp);
+       if (p_uq) {
+               if (*endp)
+                       die("Garbage after path in: %s", command_buf.buf);
+               p = p_uq;
+       }
 
-       if (get_sha1_hex(hexsha1, sha1))
-               die("Invalid sha1 %s for %s", hexsha1, path);
-       oe = find_object(sha1);
        if (oe) {
                if (oe->type != OBJ_BLOB)
-                       die("%s is a %s not a blob (for %s)", hexsha1, type_names[oe->type], path);
+                       die("Not a blob (actually a %s): %s",
+                               command_buf.buf, type_names[oe->type]);
        } else {
                if (sha1_object_info(sha1, type, NULL))
-                       die("No blob %s for %s", hexsha1, path);
+                       die("Blob not found: %s", command_buf.buf);
                if (strcmp(blob_type, type))
-                       die("%s is a %s not a blob (for %s)", hexsha1, type, path);
+                       die("Not a blob (actually a %s): %s",
+                               command_buf.buf, type);
        }
 
-       tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644);
+       tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode);
+
+       if (p_uq)
+               free(p_uq);
 }
 
 static void file_change_d(struct branch *b)
 {
-       tree_content_remove(&b->branch_tree, read_path());
+       const char *p = command_buf.buf + 2;
+       char *p_uq;
+       const char *endp;
+
+       p_uq = unquote_c_style(p, &endp);
+       if (p_uq) {
+               if (*endp)
+                       die("Garbage after path in: %s", command_buf.buf);
+               p = p_uq;
+       }
+       tree_content_remove(&b->branch_tree, p);
+       if (p_uq)
+               free(p_uq);
 }
 
-static void cmd_new_commit()
+static void cmd_from(struct branch *b)
 {
-       static const unsigned int max_hdr_len = 94;
-       const char *name = read_path();
-       struct branch *b = lookup_branch(name);
-       unsigned int acmsglen;
-       char *body, *c;
+       const char *from, *endp;
+       char *str_uq;
+       struct branch *s;
+
+       if (strncmp("from ", command_buf.buf, 5))
+               return;
+
+       if (b->last_commit)
+               die("Can't reinitailize branch %s", b->name);
+
+       from = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(from, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after string in: %s", command_buf.buf);
+               from = str_uq;
+       }
+
+       s = lookup_branch(from);
+       if (b == s)
+               die("Can't create a branch from itself: %s", b->name);
+       else if (s) {
+               memcpy(b->sha1, s->sha1, 20);
+               memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
+       } else if (*from == ':') {
+               unsigned long idnum = strtoul(from + 1, NULL, 10);
+               struct object_entry *oe = find_mark(idnum);
+               unsigned long size;
+               char *buf;
+               if (oe->type != OBJ_COMMIT)
+                       die("Mark :%lu not a commit", idnum);
+               memcpy(b->sha1, oe->sha1, 20);
+               buf = unpack_entry(oe->offset, &size);
+               if (!buf || size < 46)
+                       die("Not a valid commit: %s", from);
+               if (memcmp("tree ", buf, 5)
+                       || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+                       die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+               free(buf);
+       } else if (!get_sha1(from, b->sha1)) {
+               if (!memcmp(b->sha1, null_sha1, 20))
+                       memcpy(b->branch_tree.sha1, null_sha1, 20);
+               else {
+                       unsigned long size;
+                       char *buf;
+
+                       buf = read_object_with_reference(b->sha1,
+                               type_names[OBJ_COMMIT], &size, b->sha1);
+                       if (!buf || size < 46)
+                               die("Not a valid commit: %s", from);
+                       if (memcmp("tree ", buf, 5)
+                               || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+                               die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+                       free(buf);
+               }
+       } else
+               die("Invalid ref name or SHA1 expression: %s", from);
+
+       read_next_command();
+}
 
+static void cmd_new_commit()
+{
+       struct branch *b;
+       void *msg;
+       size_t msglen;
+       char *str_uq;
+       const char *endp;
+       char *sp;
+       char *author = NULL;
+       char *committer = NULL;
+       char *body;
+
+       /* Obtain the branch name from the rest of our command */
+       sp = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(sp, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after ref in: %s", command_buf.buf);
+               sp = str_uq;
+       }
+       b = lookup_branch(sp);
        if (!b)
-               die("Branch not declared: %s", name);
-       if (!b->branch_tree.tree) {
+               b = new_branch(sp);
+       if (str_uq)
+               free(str_uq);
+
+       read_next_command();
+       cmd_mark();
+       if (!strncmp("author ", command_buf.buf, 7)) {
+               author = strdup(command_buf.buf);
+               read_next_command();
+       }
+       if (!strncmp("committer ", command_buf.buf, 10)) {
+               committer = strdup(command_buf.buf);
+               read_next_command();
+       }
+       if (!committer)
+               die("Expected committer but didn't get one");
+       msg = cmd_data(&msglen);
+       read_next_command();
+       cmd_from(b);
+
+       /* ensure the branch is active/loaded */
+       if (!b->branch_tree.tree || !max_active_branches) {
                unload_one_branch();
                load_branch(b);
        }
 
-       /* author_committer_msg */
-       yread(0, &acmsglen, 4);
-       body = xmalloc(acmsglen + max_hdr_len);
-       c = body + max_hdr_len;
-       yread(0, c, acmsglen);
-
-       /* oddly enough this is all that fsck-objects cares about */
-       if (memcmp(c, "author ", 7))
-               die("Invalid commit format on branch %s", name);
-
        /* file_change* */
        for (;;) {
-               unsigned char cmd;
-               yread(0, &cmd, 1);
-               if (cmd == '0')
+               if (1 == command_buf.len)
                        break;
-               else if (cmd == 'M')
+               else if (!strncmp("M ", command_buf.buf, 2))
                        file_change_m(b);
-               else if (cmd == 'D')
+               else if (!strncmp("D ", command_buf.buf, 2))
                        file_change_d(b);
                else
-                       die("Unsupported file_change: %c", cmd);
+                       die("Unsupported file_change: %s", command_buf.buf);
+               read_next_command();
        }
 
-       if (memcmp(b->sha1, null_sha1, 20)) {
-               sprintf(c - 48, "parent %s", sha1_to_hex(b->sha1));
-               *(c - 1) = '\n';
-               c -= 48;
-       }
+       /* build the tree and the commit */
        store_tree(&b->branch_tree);
-       sprintf(c - 46, "tree %s", sha1_to_hex(b->branch_tree.sha1));
-       *(c - 1) = '\n';
-       c -= 46;
-
-       store_object(OBJ_COMMIT,
-               c, (body + max_hdr_len + acmsglen) - c,
-               NULL, b->sha1);
+       body = xmalloc(97 + msglen
+               + (author
+                       ? strlen(author) + strlen(committer)
+                       : 2 * strlen(committer)));
+       sp = body;
+       sp += sprintf(sp, "tree %s\n", sha1_to_hex(b->branch_tree.sha1));
+       if (memcmp(b->sha1, null_sha1, 20))
+               sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
+       if (author)
+               sp += sprintf(sp, "%s\n", author);
+       else
+               sp += sprintf(sp, "author %s\n", committer + 10);
+       sp += sprintf(sp, "%s\n\n", committer);
+       memcpy(sp, msg, msglen);
+       sp += msglen;
+       if (author)
+               free(author);
+       free(committer);
+       free(msg);
+
+       store_object(OBJ_COMMIT, body, sp - body, NULL, b->sha1, next_mark);
        free(body);
        b->last_commit = object_count_by_type[OBJ_COMMIT];
 }
 
-static void cmd_new_branch()
+static void cmd_new_tag()
 {
-       struct branch *b = new_branch(read_path());
-       const char *base = read_path();
-       struct branch *s = lookup_branch(base);
+       char *str_uq;
+       const char *endp;
+       char *sp;
+       const char *from;
+       char *tagger;
+       struct branch *s;
+       void *msg;
+       size_t msglen;
+       char *body;
+       struct tag *t;
+       unsigned char sha1[20];
 
-       if (!strcmp(b->name, base))
-               die("Can't create a branch from itself: %s", base);
-       else if (s) {
-               memcpy(b->sha1, s->sha1, 20);
-               memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
+       /* Obtain the new tag name from the rest of our command */
+       sp = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(sp, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after tag name in: %s", command_buf.buf);
+               sp = str_uq;
+       }
+       t = pool_alloc(sizeof(struct tag));
+       t->next_tag = NULL;
+       t->name = pool_strdup(sp);
+       if (last_tag)
+               last_tag->next_tag = t;
+       else
+               first_tag = t;
+       last_tag = t;
+       if (str_uq)
+               free(str_uq);
+       read_next_command();
+
+       /* from ... */
+       if (strncmp("from ", command_buf.buf, 5))
+               die("Expected from command, got %s", command_buf.buf);
+
+       from = strchr(command_buf.buf, ' ') + 1;
+       str_uq = unquote_c_style(from, &endp);
+       if (str_uq) {
+               if (*endp)
+                       die("Garbage after string in: %s", command_buf.buf);
+               from = str_uq;
        }
-       else if (!get_sha1(base, b->sha1)) {
-               if (!memcmp(b->sha1, null_sha1, 20))
-                       memcpy(b->branch_tree.sha1, null_sha1, 20);
-               else {
-                       unsigned long size;
-                       char *buf;
 
-                       buf = read_object_with_reference(b->sha1,
-                               type_names[OBJ_COMMIT], &size, b->sha1);
-                       if (!buf || size < 46)
-                               die("Not a valid commit: %s", base);
-                       if (memcmp("tree ", buf, 5)
-                               || get_sha1_hex(buf + 5, b->branch_tree.sha1))
-                               die("The commit %s is corrupt", sha1_to_hex(b->sha1));
-                       free(buf);
-               }
+       s = lookup_branch(from);
+       if (s) {
+               memcpy(sha1, s->sha1, 20);
+       } else if (*from == ':') {
+               unsigned long idnum = strtoul(from + 1, NULL, 10);
+               struct object_entry *oe = find_mark(idnum);
+               if (oe->type != OBJ_COMMIT)
+                       die("Mark :%lu not a commit", idnum);
+               memcpy(sha1, oe->sha1, 20);
+       } else if (!get_sha1(from, sha1)) {
+               unsigned long size;
+               char *buf;
+
+               buf = read_object_with_reference(sha1,
+                       type_names[OBJ_COMMIT], &size, sha1);
+               if (!buf || size < 46)
+                       die("Not a valid commit: %s", from);
+               free(buf);
        } else
-               die("Not a SHA1 or branch: %s", base);
+               die("Invalid ref name or SHA1 expression: %s", from);
+
+       if (str_uq)
+               free(str_uq);
+       read_next_command();
+
+       /* tagger ... */
+       if (strncmp("tagger ", command_buf.buf, 7))
+               die("Expected tagger command, got %s", command_buf.buf);
+       tagger = strdup(command_buf.buf);
+
+       /* tag payload/message */
+       read_next_command();
+       msg = cmd_data(&msglen);
+
+       /* build the tag object */
+       body = xmalloc(67 + strlen(t->name) + strlen(tagger) + msglen);
+       sp = body;
+       sp += sprintf(sp, "object %s\n", sha1_to_hex(sha1));
+       sp += sprintf(sp, "type %s\n", type_names[OBJ_COMMIT]);
+       sp += sprintf(sp, "tag %s\n", t->name);
+       sp += sprintf(sp, "%s\n\n", tagger);
+       memcpy(sp, msg, msglen);
+       sp += msglen;
+       free(tagger);
+       free(msg);
+
+       store_object(OBJ_TAG, body, sp - body, NULL, t->sha1, 0);
+       free(body);
 }
 
+static const char fast_import_usage[] =
+"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] temp.pack";
+
 int main(int argc, const char **argv)
 {
-       const char *base_name = argv[1];
-       int est_obj_cnt = atoi(argv[2]);
+       const char *base_name;
+       int i;
+       unsigned long est_obj_cnt = 1000;
        char *pack_name;
        char *idx_name;
        struct stat sb;
@@ -1078,6 +1589,26 @@ int main(int argc, const char **argv)
        setup_ident();
        git_config(git_default_config);
 
+       for (i = 1; i < argc; i++) {
+               const char *a = argv[i];
+
+               if (*a != '-' || !strcmp(a, "--"))
+                       break;
+               else if (!strncmp(a, "--objects=", 10))
+                       est_obj_cnt = strtoul(a + 10, NULL, 0);
+               else if (!strncmp(a, "--depth=", 8))
+                       max_depth = strtoul(a + 8, NULL, 0);
+               else if (!strncmp(a, "--active-branches=", 18))
+                       max_active_branches = strtoul(a + 18, NULL, 0);
+               else if (!strncmp(a, "--export-marks=", 15))
+                       mark_file = a + 15;
+               else
+                       die("unknown option %s", a);
+       }
+       if ((i+1) != argc)
+               usage(fast_import_usage);
+       base_name = argv[i];
+
        pack_name = xmalloc(strlen(base_name) + 6);
        sprintf(pack_name, "%s.pack", base_name);
        idx_name = xmalloc(strlen(base_name) + 5);
@@ -1087,30 +1618,35 @@ int main(int argc, const char **argv)
        if (pack_fd < 0)
                die("Can't create %s: %s", pack_name, strerror(errno));
 
+       init_pack_header();
        alloc_objects(est_obj_cnt);
+       strbuf_init(&command_buf);
 
        atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
        branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
        avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
+       marks = pool_calloc(1, sizeof(struct mark_set));
 
-       init_pack_header();
        for (;;) {
-               unsigned long cmd;
-               if (optional_read(0, &cmd, 4))
+               read_next_command();
+               if (command_buf.eof)
                        break;
-
-               switch (ntohl(cmd)) {
-               case 'blob': cmd_new_blob();   break;
-               case 'comt': cmd_new_commit(); break;
-               case 'brch': cmd_new_branch(); break;
-               default:
-                       die("Invalid command %lu", cmd);
-               }
+               else if (!strcmp("blob", command_buf.buf))
+                       cmd_new_blob();
+               else if (!strncmp("commit ", command_buf.buf, 7))
+                       cmd_new_commit();
+               else if (!strncmp("tag ", command_buf.buf, 4))
+                       cmd_new_tag();
+               else
+                       die("Unsupported command: %s", command_buf.buf);
        }
+
        fixup_header_footer();
        close(pack_fd);
        write_index(idx_name);
        dump_branches();
+       dump_tags();
+       dump_marks();
 
        fprintf(stderr, "%s statistics:\n", argv[0]);
        fprintf(stderr, "---------------------------------------------------\n");
@@ -1120,11 +1656,13 @@ int main(int argc, const char **argv)
        fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
        fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
        fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
-       fprintf(stderr, "Total branches:  %10lu\n", branch_count);
-       fprintf(stderr, "Total atoms:     %10u\n", atom_cnt);
+       fprintf(stderr, "Total branches:  %10lu (%10lu loads     )\n", branch_count, branch_load_count);
+       fprintf(stderr, "      marks:     %10u (%10lu unique    )\n", (1 << marks->shift) * 1024, marks_set_count);
+       fprintf(stderr, "      atoms:     %10u\n", atom_cnt);
        fprintf(stderr, "Memory total:    %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
        fprintf(stderr, "       pools:    %10lu KiB\n", total_allocd/1024);
        fprintf(stderr, "     objects:    %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
+       fprintf(stderr, "Pack remaps:     %10lu\n", remap_count);
        fprintf(stderr, "---------------------------------------------------\n");
 
        stat(pack_name, &sb);