lf;
commit_msg ::= data;
- file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
- | 'D' sp path_str lf
- ;
- mode ::= '644' | '755';
+ file_change ::= file_del | file_obm | file_inm;
+ file_del ::= 'D' sp path_str lf;
+ file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
+ file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
+ data;
new_tag ::= 'tag' sp tag_str lf
'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
# a new mark directive with the old idnum.
#
mark ::= 'mark' sp idnum lf;
+ data ::= (delimited_data | exact_data)
+ lf;
+
+ # note: delim may be any string but must not contain lf.
+ # data_line may contain any data but must not be exactly
+ # delim.
+ delimited_data ::= 'data' sp '<<' delim lf
+ (data_line lf)*
+ delim lf;
# note: declen indicates the length of binary_data in bytes.
- # declen does not include the lf preceeding or trailing the
- # binary data.
+ # declen does not include the lf preceeding the binary data.
#
- data ::= 'data' sp declen lf
- binary_data
- lf;
+ exact_data ::= 'data' sp declen lf
+ binary_data;
# note: quoted strings are C-style quoting supporting \c for
# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
tag_str ::= tag | '"' quoted(tag) '"' ;
path_str ::= path | '"' quoted(path) '"' ;
+ mode ::= '100644' | '644'
+ | '100755' | '755'
+ | '140000'
+ ;
declen ::= # unsigned 32 bit value, ascii base10 notation;
+ bigint ::= # unsigned integer value, ascii base10 notation;
binary_data ::= # file content, not interpreted;
sp ::= # ASCII space character;
# an idnum. This is to distinguish it from a ref or tag name as
# GIT does not permit ':' in ref or tag strings.
#
- idnum ::= ':' declen;
+ idnum ::= ':' bigint;
path ::= # GIT style file path, e.g. "a/b/c";
ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
#include "strbuf.h"
#include "quote.h"
+#define PACK_ID_BITS 16
+#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
+
struct object_entry
{
struct object_entry *next;
unsigned long offset;
unsigned type : TYPE_BITS;
- unsigned pack_id : 16;
+ unsigned pack_id : PACK_ID_BITS;
unsigned char sha1[20];
};
struct mark_set
{
- int shift;
union {
struct object_entry *marked[1024];
struct mark_set *sets[1024];
} data;
+ unsigned int shift;
};
struct last_object
struct atom_str
{
struct atom_str *next_atom;
- int str_len;
+ unsigned int str_len;
char str_dat[FLEX_ARRAY]; /* more */
};
struct branch *table_next_branch;
struct branch *active_next_branch;
const char *name;
- unsigned long last_commit;
struct tree_entry branch_tree;
+ uintmax_t last_commit;
+ unsigned int pack_id;
unsigned char sha1[20];
};
{
struct tag *next_tag;
const char *name;
+ unsigned int pack_id;
unsigned char sha1[20];
};
/* Configured limits on output */
static unsigned long max_depth = 10;
-static unsigned long max_packsize = -1;
-static uintmax_t max_objects = -1;
+static unsigned long max_packsize = (1LL << 32) - 1;
/* Stats and misc. counters */
static uintmax_t alloc_count;
-static uintmax_t object_count;
static uintmax_t marks_set_count;
static uintmax_t object_count_by_type[1 << TYPE_BITS];
static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
static uintmax_t delta_count_by_type[1 << TYPE_BITS];
+static unsigned long object_count;
static unsigned long branch_count;
static unsigned long branch_load_count;
b->table_next_branch = branch_table[hc];
b->branch_tree.versions[0].mode = S_IFDIR;
b->branch_tree.versions[1].mode = S_IFDIR;
+ b->pack_id = MAX_PACK_ID;
branch_table[hc] = b;
branch_count++;
return b;
return r;
}
-static struct tree_entry* new_tree_entry()
+static struct tree_entry* new_tree_entry(void)
{
struct tree_entry *e;
avail_tree_entry = e;
}
-static void start_packfile()
+static void start_packfile(void)
{
static char tmpfile[PATH_MAX];
struct packed_git *p;
all_packs[pack_id] = p;
}
-static void fixup_header_footer()
+static void fixup_header_footer(void)
{
+ static const int buf_sz = 128 * 1024;
int pack_fd = pack_data->pack_fd;
SHA_CTX c;
- char hdr[8];
- unsigned long cnt;
+ struct pack_header hdr;
char *buf;
if (lseek(pack_fd, 0, SEEK_SET) != 0)
die("Failed seeking to start: %s", strerror(errno));
-
- SHA1_Init(&c);
- if (read_in_full(pack_fd, hdr, 8) != 8)
+ if (read_in_full(pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
die("Unable to reread header of %s", pack_data->pack_name);
- SHA1_Update(&c, hdr, 8);
+ if (lseek(pack_fd, 0, SEEK_SET) != 0)
+ die("Failed seeking to start: %s", strerror(errno));
+ hdr.hdr_entries = htonl(object_count);
+ write_or_die(pack_fd, &hdr, sizeof(hdr));
- cnt = htonl(object_count);
- SHA1_Update(&c, &cnt, 4);
- write_or_die(pack_fd, &cnt, 4);
+ SHA1_Init(&c);
+ SHA1_Update(&c, &hdr, sizeof(hdr));
- buf = xmalloc(128 * 1024);
+ buf = xmalloc(buf_sz);
for (;;) {
- size_t n = xread(pack_fd, buf, 128 * 1024);
- if (n <= 0)
+ size_t n = xread(pack_fd, buf, buf_sz);
+ if (!n)
break;
+ if (n < 0)
+ die("Failed to checksum %s", pack_data->pack_name);
SHA1_Update(&c, buf, n);
}
free(buf);
return hashcmp(a->sha1, b->sha1);
}
-static char* create_index()
+static char* create_index(void)
{
static char tmpfile[PATH_MAX];
SHA_CTX ctx;
struct sha1file *f;
struct object_entry **idx, **c, **last, *e;
struct object_entry_pool *o;
- unsigned int array[256];
+ uint32_t array[256];
int i, idx_fd;
/* Build the sorted table of object IDs. */
sha1write(f, array, 256 * sizeof(int));
SHA1_Init(&ctx);
for (c = idx; c != last; c++) {
- unsigned int offset = htonl((*c)->offset);
+ uint32_t offset = htonl((*c)->offset);
sha1write(f, &offset, 4);
sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
SHA1_Update(&ctx, (*c)->sha1, 20);
get_object_directory(), sha1_to_hex(pack_data->sha1));
if (move_temp_to_file(pack_data->pack_name, name))
die("cannot store pack file");
- printf("%s\n", name);
snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
get_object_directory(), sha1_to_hex(pack_data->sha1));
return name;
}
-static void unkeep_all_packs()
+static void unkeep_all_packs(void)
{
static char name[PATH_MAX];
int k;
}
}
-static void end_packfile()
+static void end_packfile(void)
{
struct packed_git *old_p = pack_data, *new_p;
if (object_count) {
char *idx_name;
+ int i;
+ struct branch *b;
+ struct tag *t;
fixup_header_footer();
idx_name = keep_pack(create_index());
if (!new_p)
die("core git rejected index %s", idx_name);
new_p->windows = old_p->windows;
- all_packs[pack_id++] = new_p;
+ all_packs[pack_id] = new_p;
install_packed_git(new_p);
+
+ /* Print the boundary */
+ fprintf(stdout, "%s:", new_p->pack_name);
+ for (i = 0; i < branch_table_sz; i++) {
+ for (b = branch_table[i]; b; b = b->table_next_branch) {
+ if (b->pack_id == pack_id)
+ fprintf(stdout, " %s", sha1_to_hex(b->sha1));
+ }
+ }
+ for (t = first_tag; t; t = t->next_tag) {
+ if (t->pack_id == pack_id)
+ fprintf(stdout, " %s", sha1_to_hex(t->sha1));
+ }
+ fputc('\n', stdout);
+
+ pack_id++;
}
else
unlink(old_p->pack_name);
last_blob.depth = 0;
}
-static void checkpoint()
+static void checkpoint(void)
{
end_packfile();
start_packfile();
deflateEnd(&s);
/* Determine if we should auto-checkpoint. */
- if ((object_count + 1) > max_objects
- || (object_count + 1) < object_count
- || (pack_size + 60 + s.total_out) > max_packsize
+ if ((pack_size + 60 + s.total_out) > max_packsize
|| (pack_size + 60 + s.total_out) < pack_size) {
/* This new object needs to *not* have the current pack_id. */
return 1;
}
-static void dump_branches()
+static void dump_branches(void)
{
static const char *msg = "fast-import";
unsigned int i;
}
}
-static void dump_tags()
+static void dump_tags(void)
{
static const char *msg = "fast-import";
struct tag *t;
}
}
-static void dump_marks()
+static void dump_marks(void)
{
if (mark_file)
{
}
}
-static void read_next_command()
+static void read_next_command(void)
{
read_line(&command_buf, stdin, '\n');
}
-static void cmd_mark()
+static void cmd_mark(void)
{
if (!strncmp("mark :", command_buf.buf, 6)) {
next_mark = strtoumax(command_buf.buf + 6, NULL, 10);
static void* cmd_data (size_t *size)
{
- size_t n = 0;
- void *buffer;
size_t length;
+ char *buffer;
if (strncmp("data ", command_buf.buf, 5))
die("Expected 'data n' command, found: %s", command_buf.buf);
- length = strtoul(command_buf.buf + 5, NULL, 10);
- buffer = xmalloc(length);
-
- while (n < length) {
- size_t s = fread((char*)buffer + n, 1, length - n, stdin);
- if (!s && feof(stdin))
- die("EOF in data (%lu bytes remaining)", length - n);
- n += s;
+ if (!strncmp("<<", command_buf.buf + 5, 2)) {
+ char *term = xstrdup(command_buf.buf + 5 + 2);
+ size_t sz = 8192, term_len = command_buf.len - 5 - 2;
+ length = 0;
+ buffer = xmalloc(sz);
+ for (;;) {
+ read_next_command();
+ if (command_buf.eof)
+ die("EOF in data (terminator '%s' not found)", term);
+ if (term_len == command_buf.len
+ && !strcmp(term, command_buf.buf))
+ break;
+ if (sz < (length + command_buf.len)) {
+ sz = sz * 3 / 2 + 16;
+ if (sz < (length + command_buf.len))
+ sz = length + command_buf.len;
+ buffer = xrealloc(buffer, sz);
+ }
+ memcpy(buffer + length,
+ command_buf.buf,
+ command_buf.len - 1);
+ length += command_buf.len - 1;
+ buffer[length++] = '\n';
+ }
+ free(term);
+ }
+ else {
+ size_t n = 0;
+ length = strtoul(command_buf.buf + 5, NULL, 10);
+ buffer = xmalloc(length);
+ while (n < length) {
+ size_t s = fread(buffer + n, 1, length - n, stdin);
+ if (!s && feof(stdin))
+ die("EOF in data (%lu bytes remaining)", length - n);
+ n += s;
+ }
}
if (fgetc(stdin) != '\n')
return buffer;
}
-static void cmd_new_blob()
+static void cmd_new_blob(void)
{
size_t l;
void *d;
free(d);
}
-static void unload_one_branch()
+static void unload_one_branch(void)
{
while (cur_active_branches
&& cur_active_branches >= max_active_branches) {
const char *endp;
struct object_entry *oe;
unsigned char sha1[20];
- unsigned int mode;
+ unsigned int mode, inline_data = 0;
char type[20];
p = get_mode(p, &mode);
oe = find_mark(strtoumax(p + 1, &x, 10));
hashcpy(sha1, oe->sha1);
p = x;
+ } else if (!strncmp("inline", p, 6)) {
+ inline_data = 1;
+ p += 6;
} else {
if (get_sha1_hex(p, sha1))
die("Invalid SHA1: %s", command_buf.buf);
p = p_uq;
}
- if (oe) {
+ if (inline_data) {
+ size_t l;
+ void *d;
+ if (!p_uq)
+ p = p_uq = xstrdup(p);
+ read_next_command();
+ d = cmd_data(&l);
+ if (store_object(OBJ_BLOB, d, l, &last_blob, sha1, 0))
+ free(d);
+ } else if (oe) {
if (oe->type != OBJ_BLOB)
die("Not a blob (actually a %s): %s",
command_buf.buf, type_names[oe->type]);
return list;
}
-static void cmd_new_commit()
+static void cmd_new_commit(void)
{
struct branch *b;
void *msg;
free(committer);
free(msg);
- store_object(OBJ_COMMIT,
+ if (!store_object(OBJ_COMMIT,
new_data.buffer, sp - (char*)new_data.buffer,
- NULL, b->sha1, next_mark);
+ NULL, b->sha1, next_mark))
+ b->pack_id = pack_id;
b->last_commit = object_count_by_type[OBJ_COMMIT];
if (branch_log) {
}
}
-static void cmd_new_tag()
+static void cmd_new_tag(void)
{
char *str_uq;
const char *endp;
free(tagger);
free(msg);
- store_object(OBJ_TAG, new_data.buffer, sp - (char*)new_data.buffer,
- NULL, t->sha1, 0);
+ if (store_object(OBJ_TAG, new_data.buffer,
+ sp - (char*)new_data.buffer,
+ NULL, t->sha1, 0))
+ t->pack_id = MAX_PACK_ID;
+ else
+ t->pack_id = pack_id;
if (branch_log) {
int need_dq = quote_c_style(t->name, NULL, NULL, 0);
}
}
-static void cmd_reset_branch()
+static void cmd_reset_branch(void)
{
struct branch *b;
char *str_uq;
cmd_from(b);
}
-static void cmd_checkpoint()
+static void cmd_checkpoint(void)
{
if (object_count)
checkpoint();
}
static const char fast_import_usage[] =
-"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log]";
+"git-fast-import [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log]";
int main(int argc, const char **argv)
{
int i;
- uintmax_t est_obj_cnt = object_entry_alloc;
- uintmax_t duplicate_count;
+ uintmax_t total_count, duplicate_count;
setup_ident();
git_config(git_default_config);
if (*a != '-' || !strcmp(a, "--"))
break;
- else if (!strncmp(a, "--objects=", 10))
- est_obj_cnt = strtoumax(a + 10, NULL, 0);
- else if (!strncmp(a, "--max-objects-per-pack=", 23))
- max_objects = strtoumax(a + 23, NULL, 0);
else if (!strncmp(a, "--max-pack-size=", 16))
max_packsize = strtoumax(a + 16, NULL, 0) * 1024 * 1024;
else if (!strncmp(a, "--depth=", 8))
if (i != argc)
usage(fast_import_usage);
- alloc_objects(est_obj_cnt);
+ alloc_objects(object_entry_alloc);
strbuf_init(&command_buf);
atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
if (branch_log)
fclose(branch_log);
+ total_count = 0;
+ for (i = 0; i < ARRAY_SIZE(object_count_by_type); i++)
+ total_count += object_count_by_type[i];
duplicate_count = 0;
for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
duplicate_count += duplicate_count_by_type[i];
fprintf(stderr, "%s statistics:\n", argv[0]);
fprintf(stderr, "---------------------------------------------------------------------\n");
- fprintf(stderr, "Alloc'd objects: %10ju (%10ju overflow )\n", alloc_count, alloc_count - est_obj_cnt);
- fprintf(stderr, "Total objects: %10ju (%10ju duplicates )\n", object_count, duplicate_count);
+ fprintf(stderr, "Alloc'd objects: %10ju\n", alloc_count);
+ fprintf(stderr, "Total objects: %10ju (%10ju duplicates )\n", total_count, duplicate_count);
fprintf(stderr, " blobs : %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
fprintf(stderr, " trees : %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
fprintf(stderr, " commits: %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);