Accept 'inline' file data in fast-import commit structure.
[gitweb.git] / fast-import.c
index 393020504a1d2ac3690298398a033c6a4d6ea819..487a91a4eefae26fecebfa56fe91fbc8ad278647 100644 (file)
@@ -25,10 +25,11 @@ Format of STDIN stream:
     lf;
   commit_msg ::= data;
 
-  file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
-                | 'D' sp path_str lf
-                ;
-  mode ::= '644' | '755';
+  file_change ::= file_del | file_obm | file_inm;
+  file_del ::= 'D' sp path_str lf;
+  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
+  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
+    data;
 
   new_tag ::= 'tag' sp tag_str lf
     'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
@@ -50,14 +51,21 @@ Format of STDIN stream:
      # a new mark directive with the old idnum.
         #
   mark ::= 'mark' sp idnum lf;
+  data ::= (delimited_data | exact_data)
+    lf;
+
+    # note: delim may be any string but must not contain lf.
+    # data_line may contain any data but must not be exactly
+    # delim.
+  delimited_data ::= 'data' sp '<<' delim lf
+    (data_line lf)*
+       delim lf;
 
      # note: declen indicates the length of binary_data in bytes.
-     # declen does not include the lf preceeding or trailing the
-     # binary data.
+     # declen does not include the lf preceeding the binary data.
      #
-  data ::= 'data' sp declen lf
-    binary_data
-       lf;
+  exact_data ::= 'data' sp declen lf
+    binary_data;
 
      # note: quoted strings are C-style quoting supporting \c for
      # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
@@ -70,8 +78,13 @@ Format of STDIN stream:
   sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
   tag_str     ::= tag     | '"' quoted(tag)     '"' ;
   path_str    ::= path    | '"' quoted(path)    '"' ;
+  mode        ::= '100644' | '644'
+                | '100755' | '755'
+                | '140000'
+                ;
 
   declen ::= # unsigned 32 bit value, ascii base10 notation;
+  bigint ::= # unsigned integer value, ascii base10 notation;
   binary_data ::= # file content, not interpreted;
 
   sp ::= # ASCII space character;
@@ -81,7 +94,7 @@ Format of STDIN stream:
         # an idnum.  This is to distinguish it from a ref or tag name as
      # GIT does not permit ':' in ref or tag strings.
         #
-  idnum   ::= ':' declen;
+  idnum   ::= ':' bigint;
   path    ::= # GIT style file path, e.g. "a/b/c";
   ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
   tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
@@ -110,12 +123,15 @@ Format of STDIN stream:
 #include "strbuf.h"
 #include "quote.h"
 
+#define PACK_ID_BITS 16
+#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
+
 struct object_entry
 {
        struct object_entry *next;
        unsigned long offset;
        unsigned type : TYPE_BITS;
-       unsigned pack_id : 16;
+       unsigned pack_id : PACK_ID_BITS;
        unsigned char sha1[20];
 };
 
@@ -129,11 +145,11 @@ struct object_entry_pool
 
 struct mark_set
 {
-       int shift;
        union {
                struct object_entry *marked[1024];
                struct mark_set *sets[1024];
        } data;
+       unsigned int shift;
 };
 
 struct last_object
@@ -156,7 +172,7 @@ struct mem_pool
 struct atom_str
 {
        struct atom_str *next_atom;
-       int str_len;
+       unsigned int str_len;
        char str_dat[FLEX_ARRAY]; /* more */
 };
 
@@ -191,8 +207,9 @@ struct branch
        struct branch *table_next_branch;
        struct branch *active_next_branch;
        const char *name;
-       unsigned long last_commit;
        struct tree_entry branch_tree;
+       uintmax_t last_commit;
+       unsigned int pack_id;
        unsigned char sha1[20];
 };
 
@@ -200,6 +217,7 @@ struct tag
 {
        struct tag *next_tag;
        const char *name;
+       unsigned int pack_id;
        unsigned char sha1[20];
 };
 
@@ -217,16 +235,15 @@ struct hash_list
 
 /* Configured limits on output */
 static unsigned long max_depth = 10;
-static unsigned long max_packsize = -1;
-static uintmax_t max_objects = -1;
+static unsigned long max_packsize = (1LL << 32) - 1;
 
 /* Stats and misc. counters */
 static uintmax_t alloc_count;
-static uintmax_t object_count;
 static uintmax_t marks_set_count;
 static uintmax_t object_count_by_type[1 << TYPE_BITS];
 static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
 static uintmax_t delta_count_by_type[1 << TYPE_BITS];
+static unsigned long object_count;
 static unsigned long branch_count;
 static unsigned long branch_load_count;
 
@@ -244,7 +261,6 @@ static struct atom_str **atom_table;
 static unsigned int pack_id;
 static struct packed_git *pack_data;
 static struct packed_git **all_packs;
-static int pack_fd;
 static unsigned long pack_size;
 
 /* Table of objects we've written. */
@@ -492,6 +508,7 @@ static struct branch* new_branch(const char *name)
        b->table_next_branch = branch_table[hc];
        b->branch_tree.versions[0].mode = S_IFDIR;
        b->branch_tree.versions[1].mode = S_IFDIR;
+       b->pack_id = MAX_PACK_ID;
        branch_table[hc] = b;
        branch_count++;
        return b;
@@ -559,7 +576,7 @@ static struct tree_content* grow_tree_content(
        return r;
 }
 
-static struct tree_entry* new_tree_entry()
+static struct tree_entry* new_tree_entry(void)
 {
        struct tree_entry *e;
 
@@ -587,11 +604,12 @@ static void release_tree_entry(struct tree_entry *e)
        avail_tree_entry = e;
 }
 
-static void start_packfile()
+static void start_packfile(void)
 {
        static char tmpfile[PATH_MAX];
        struct packed_git *p;
        struct pack_header hdr;
+       int pack_fd;
 
        snprintf(tmpfile, sizeof(tmpfile),
                "%s/pack_XXXXXX", get_object_directory());
@@ -605,7 +623,7 @@ static void start_packfile()
        hdr.hdr_signature = htonl(PACK_SIGNATURE);
        hdr.hdr_version = htonl(2);
        hdr.hdr_entries = 0;
-       write_or_die(pack_fd, &hdr, sizeof(hdr));
+       write_or_die(p->pack_fd, &hdr, sizeof(hdr));
 
        pack_data = p;
        pack_size = sizeof(hdr);
@@ -615,36 +633,40 @@ static void start_packfile()
        all_packs[pack_id] = p;
 }
 
-static void fixup_header_footer()
+static void fixup_header_footer(void)
 {
+       static const int buf_sz = 128 * 1024;
+       int pack_fd = pack_data->pack_fd;
        SHA_CTX c;
-       char hdr[8];
-       unsigned long cnt;
+       struct pack_header hdr;
        char *buf;
 
        if (lseek(pack_fd, 0, SEEK_SET) != 0)
                die("Failed seeking to start: %s", strerror(errno));
-
-       SHA1_Init(&c);
-       if (read_in_full(pack_fd, hdr, 8) != 8)
+       if (read_in_full(pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
                die("Unable to reread header of %s", pack_data->pack_name);
-       SHA1_Update(&c, hdr, 8);
+       if (lseek(pack_fd, 0, SEEK_SET) != 0)
+               die("Failed seeking to start: %s", strerror(errno));
+       hdr.hdr_entries = htonl(object_count);
+       write_or_die(pack_fd, &hdr, sizeof(hdr));
 
-       cnt = htonl(object_count);
-       SHA1_Update(&c, &cnt, 4);
-       write_or_die(pack_fd, &cnt, 4);
+       SHA1_Init(&c);
+       SHA1_Update(&c, &hdr, sizeof(hdr));
 
-       buf = xmalloc(128 * 1024);
+       buf = xmalloc(buf_sz);
        for (;;) {
-               size_t n = xread(pack_fd, buf, 128 * 1024);
-               if (n <= 0)
+               size_t n = xread(pack_fd, buf, buf_sz);
+               if (!n)
                        break;
+               if (n < 0)
+                       die("Failed to checksum %s", pack_data->pack_name);
                SHA1_Update(&c, buf, n);
        }
        free(buf);
 
        SHA1_Final(pack_data->sha1, &c);
        write_or_die(pack_fd, pack_data->sha1, sizeof(pack_data->sha1));
+       close(pack_fd);
 }
 
 static int oecmp (const void *a_, const void *b_)
@@ -654,14 +676,14 @@ static int oecmp (const void *a_, const void *b_)
        return hashcmp(a->sha1, b->sha1);
 }
 
-static char* create_index()
+static char* create_index(void)
 {
        static char tmpfile[PATH_MAX];
        SHA_CTX ctx;
        struct sha1file *f;
        struct object_entry **idx, **c, **last, *e;
        struct object_entry_pool *o;
-       unsigned int array[256];
+       uint32_t array[256];
        int i, idx_fd;
 
        /* Build the sorted table of object IDs. */
@@ -698,7 +720,7 @@ static char* create_index()
        sha1write(f, array, 256 * sizeof(int));
        SHA1_Init(&ctx);
        for (c = idx; c != last; c++) {
-               unsigned int offset = htonl((*c)->offset);
+               uint32_t offset = htonl((*c)->offset);
                sha1write(f, &offset, 4);
                sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
                SHA1_Update(&ctx, (*c)->sha1, 20);
@@ -731,7 +753,6 @@ static char* keep_pack(char *curr_index_name)
                 get_object_directory(), sha1_to_hex(pack_data->sha1));
        if (move_temp_to_file(pack_data->pack_name, name))
                die("cannot store pack file");
-       printf("%s\n", name);
 
        snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
                 get_object_directory(), sha1_to_hex(pack_data->sha1));
@@ -740,7 +761,7 @@ static char* keep_pack(char *curr_index_name)
        return name;
 }
 
-static void unkeep_all_packs()
+static void unkeep_all_packs(void)
 {
        static char name[PATH_MAX];
        int k;
@@ -753,12 +774,15 @@ static void unkeep_all_packs()
        }
 }
 
-static void end_packfile()
+static void end_packfile(void)
 {
        struct packed_git *old_p = pack_data, *new_p;
 
        if (object_count) {
                char *idx_name;
+               int i;
+               struct branch *b;
+               struct tag *t;
 
                fixup_header_footer();
                idx_name = keep_pack(create_index());
@@ -768,14 +792,27 @@ static void end_packfile()
                if (!new_p)
                        die("core git rejected index %s", idx_name);
                new_p->windows = old_p->windows;
-               new_p->pack_fd = old_p->pack_fd;
-               all_packs[pack_id++] = new_p;
+               all_packs[pack_id] = new_p;
                install_packed_git(new_p);
+
+               /* Print the boundary */
+               fprintf(stdout, "%s:", new_p->pack_name);
+               for (i = 0; i < branch_table_sz; i++) {
+                       for (b = branch_table[i]; b; b = b->table_next_branch) {
+                               if (b->pack_id == pack_id)
+                                       fprintf(stdout, " %s", sha1_to_hex(b->sha1));
+                       }
+               }
+               for (t = first_tag; t; t = t->next_tag) {
+                       if (t->pack_id == pack_id)
+                               fprintf(stdout, " %s", sha1_to_hex(t->sha1));
+               }
+               fputc('\n', stdout);
+
+               pack_id++;
        }
-       else {
-               close(pack_fd);
+       else
                unlink(old_p->pack_name);
-       }
        free(old_p);
 
        /* We can't carry a delta across packfiles. */
@@ -786,7 +823,7 @@ static void end_packfile()
        last_blob.depth = 0;
 }
 
-static void checkpoint()
+static void checkpoint(void)
 {
        end_packfile();
        start_packfile();
@@ -874,9 +911,7 @@ static int store_object(
        deflateEnd(&s);
 
        /* Determine if we should auto-checkpoint. */
-       if ((object_count + 1) > max_objects
-               || (object_count + 1) < object_count
-               || (pack_size + 60 + s.total_out) > max_packsize
+       if ((pack_size + 60 + s.total_out) > max_packsize
                || (pack_size + 60 + s.total_out) < pack_size) {
 
                /* This new object needs to *not* have the current pack_id. */
@@ -914,23 +949,23 @@ static int store_object(
                last->depth++;
 
                hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
-               write_or_die(pack_fd, hdr, hdrlen);
+               write_or_die(pack_data->pack_fd, hdr, hdrlen);
                pack_size += hdrlen;
 
                hdr[pos] = ofs & 127;
                while (ofs >>= 7)
                        hdr[--pos] = 128 | (--ofs & 127);
-               write_or_die(pack_fd, hdr + pos, sizeof(hdr) - pos);
+               write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
                pack_size += sizeof(hdr) - pos;
        } else {
                if (last)
                        last->depth = 0;
                hdrlen = encode_header(type, datlen, hdr);
-               write_or_die(pack_fd, hdr, hdrlen);
+               write_or_die(pack_data->pack_fd, hdr, hdrlen);
                pack_size += hdrlen;
        }
 
-       write_or_die(pack_fd, out, s.total_out);
+       write_or_die(pack_data->pack_fd, out, s.total_out);
        pack_size += s.total_out;
 
        free(out);
@@ -1233,7 +1268,7 @@ static int tree_content_remove(struct tree_entry *root, const char *p)
        return 1;
 }
 
-static void dump_branches()
+static void dump_branches(void)
 {
        static const char *msg = "fast-import";
        unsigned int i;
@@ -1249,7 +1284,7 @@ static void dump_branches()
        }
 }
 
-static void dump_tags()
+static void dump_tags(void)
 {
        static const char *msg = "fast-import";
        struct tag *t;
@@ -1284,7 +1319,7 @@ static void dump_marks_helper(FILE *f,
        }
 }
 
-static void dump_marks()
+static void dump_marks(void)
 {
        if (mark_file)
        {
@@ -1294,12 +1329,12 @@ static void dump_marks()
        }
 }
 
-static void read_next_command()
+static void read_next_command(void)
 {
        read_line(&command_buf, stdin, '\n');
 }
 
-static void cmd_mark()
+static void cmd_mark(void)
 {
        if (!strncmp("mark :", command_buf.buf, 6)) {
                next_mark = strtoumax(command_buf.buf + 6, NULL, 10);
@@ -1311,21 +1346,48 @@ static void cmd_mark()
 
 static void* cmd_data (size_t *size)
 {
-       size_t n = 0;
-       void *buffer;
        size_t length;
+       char *buffer;
 
        if (strncmp("data ", command_buf.buf, 5))
                die("Expected 'data n' command, found: %s", command_buf.buf);
 
-       length = strtoul(command_buf.buf + 5, NULL, 10);
-       buffer = xmalloc(length);
-
-       while (n < length) {
-               size_t s = fread((char*)buffer + n, 1, length - n, stdin);
-               if (!s && feof(stdin))
-                       die("EOF in data (%lu bytes remaining)", length - n);
-               n += s;
+       if (!strncmp("<<", command_buf.buf + 5, 2)) {
+               char *term = xstrdup(command_buf.buf + 5 + 2);
+               size_t sz = 8192, term_len = command_buf.len - 5 - 2;
+               length = 0;
+               buffer = xmalloc(sz);
+               for (;;) {
+                       read_next_command();
+                       if (command_buf.eof)
+                               die("EOF in data (terminator '%s' not found)", term);
+                       if (term_len == command_buf.len
+                               && !strcmp(term, command_buf.buf))
+                               break;
+                       if (sz < (length + command_buf.len)) {
+                               sz = sz * 3 / 2 + 16;
+                               if (sz < (length + command_buf.len))
+                                       sz = length + command_buf.len;
+                               buffer = xrealloc(buffer, sz);
+                       }
+                       memcpy(buffer + length,
+                               command_buf.buf,
+                               command_buf.len - 1);
+                       length += command_buf.len - 1;
+                       buffer[length++] = '\n';
+               }
+               free(term);
+       }
+       else {
+               size_t n = 0;
+               length = strtoul(command_buf.buf + 5, NULL, 10);
+               buffer = xmalloc(length);
+               while (n < length) {
+                       size_t s = fread(buffer + n, 1, length - n, stdin);
+                       if (!s && feof(stdin))
+                               die("EOF in data (%lu bytes remaining)", length - n);
+                       n += s;
+               }
        }
 
        if (fgetc(stdin) != '\n')
@@ -1335,7 +1397,7 @@ static void* cmd_data (size_t *size)
        return buffer;
 }
 
-static void cmd_new_blob()
+static void cmd_new_blob(void)
 {
        size_t l;
        void *d;
@@ -1348,7 +1410,7 @@ static void cmd_new_blob()
                free(d);
 }
 
-static void unload_one_branch()
+static void unload_one_branch(void)
 {
        while (cur_active_branches
                && cur_active_branches >= max_active_branches) {
@@ -1395,7 +1457,7 @@ static void file_change_m(struct branch *b)
        const char *endp;
        struct object_entry *oe;
        unsigned char sha1[20];
-       unsigned int mode;
+       unsigned int mode, inline_data = 0;
        char type[20];
 
        p = get_mode(p, &mode);
@@ -1418,6 +1480,9 @@ static void file_change_m(struct branch *b)
                oe = find_mark(strtoumax(p + 1, &x, 10));
                hashcpy(sha1, oe->sha1);
                p = x;
+       } else if (!strncmp("inline", p, 6)) {
+               inline_data = 1;
+               p += 6;
        } else {
                if (get_sha1_hex(p, sha1))
                        die("Invalid SHA1: %s", command_buf.buf);
@@ -1434,7 +1499,16 @@ static void file_change_m(struct branch *b)
                p = p_uq;
        }
 
-       if (oe) {
+       if (inline_data) {
+               size_t l;
+               void *d;
+               if (!p_uq)
+                       p = p_uq = xstrdup(p);
+               read_next_command();
+               d = cmd_data(&l);
+               if (store_object(OBJ_BLOB, d, l, &last_blob, sha1, 0))
+                       free(d);
+       } else if (oe) {
                if (oe->type != OBJ_BLOB)
                        die("Not a blob (actually a %s): %s",
                                command_buf.buf, type_names[oe->type]);
@@ -1581,7 +1655,7 @@ static struct hash_list* cmd_merge(unsigned int *count)
        return list;
 }
 
-static void cmd_new_commit()
+static void cmd_new_commit(void)
 {
        struct branch *b;
        void *msg;
@@ -1676,9 +1750,10 @@ static void cmd_new_commit()
        free(committer);
        free(msg);
 
-       store_object(OBJ_COMMIT,
+       if (!store_object(OBJ_COMMIT,
                new_data.buffer, sp - (char*)new_data.buffer,
-               NULL, b->sha1, next_mark);
+               NULL, b->sha1, next_mark))
+               b->pack_id = pack_id;
        b->last_commit = object_count_by_type[OBJ_COMMIT];
 
        if (branch_log) {
@@ -1694,7 +1769,7 @@ static void cmd_new_commit()
        }
 }
 
-static void cmd_new_tag()
+static void cmd_new_tag(void)
 {
        char *str_uq;
        const char *endp;
@@ -1786,8 +1861,12 @@ static void cmd_new_tag()
        free(tagger);
        free(msg);
 
-       store_object(OBJ_TAG, new_data.buffer, sp - (char*)new_data.buffer,
-               NULL, t->sha1, 0);
+       if (store_object(OBJ_TAG, new_data.buffer,
+               sp - (char*)new_data.buffer,
+               NULL, t->sha1, 0))
+               t->pack_id = MAX_PACK_ID;
+       else
+               t->pack_id = pack_id;
 
        if (branch_log) {
                int need_dq = quote_c_style(t->name, NULL, NULL, 0);
@@ -1802,7 +1881,7 @@ static void cmd_new_tag()
        }
 }
 
-static void cmd_reset_branch()
+static void cmd_reset_branch(void)
 {
        struct branch *b;
        char *str_uq;
@@ -1833,7 +1912,7 @@ static void cmd_reset_branch()
        cmd_from(b);
 }
 
-static void cmd_checkpoint()
+static void cmd_checkpoint(void)
 {
        if (object_count)
                checkpoint();
@@ -1841,13 +1920,12 @@ static void cmd_checkpoint()
 }
 
 static const char fast_import_usage[] =
-"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log]";
+"git-fast-import [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log]";
 
 int main(int argc, const char **argv)
 {
        int i;
-       uintmax_t est_obj_cnt = object_entry_alloc;
-       uintmax_t duplicate_count;
+       uintmax_t total_count, duplicate_count;
 
        setup_ident();
        git_config(git_default_config);
@@ -1857,10 +1935,6 @@ int main(int argc, const char **argv)
 
                if (*a != '-' || !strcmp(a, "--"))
                        break;
-               else if (!strncmp(a, "--objects=", 10))
-                       est_obj_cnt = strtoumax(a + 10, NULL, 0);
-               else if (!strncmp(a, "--max-objects-per-pack=", 23))
-                       max_objects = strtoumax(a + 23, NULL, 0);
                else if (!strncmp(a, "--max-pack-size=", 16))
                        max_packsize = strtoumax(a + 16, NULL, 0) * 1024 * 1024;
                else if (!strncmp(a, "--depth=", 8))
@@ -1880,7 +1954,7 @@ int main(int argc, const char **argv)
        if (i != argc)
                usage(fast_import_usage);
 
-       alloc_objects(est_obj_cnt);
+       alloc_objects(object_entry_alloc);
        strbuf_init(&command_buf);
 
        atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
@@ -1915,14 +1989,17 @@ int main(int argc, const char **argv)
        if (branch_log)
                fclose(branch_log);
 
+       total_count = 0;
+       for (i = 0; i < ARRAY_SIZE(object_count_by_type); i++)
+               total_count += object_count_by_type[i];
        duplicate_count = 0;
        for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
                duplicate_count += duplicate_count_by_type[i];
 
        fprintf(stderr, "%s statistics:\n", argv[0]);
        fprintf(stderr, "---------------------------------------------------------------------\n");
-       fprintf(stderr, "Alloc'd objects: %10ju (%10ju overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
-       fprintf(stderr, "Total objects:   %10ju (%10ju duplicates                  )\n", object_count, duplicate_count);
+       fprintf(stderr, "Alloc'd objects: %10ju\n", alloc_count);
+       fprintf(stderr, "Total objects:   %10ju (%10ju duplicates                  )\n", total_count, duplicate_count);
        fprintf(stderr, "      blobs  :   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
        fprintf(stderr, "      trees  :   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
        fprintf(stderr, "      commits:   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);