From: Jonathan Nieder Date: Tue, 22 Mar 2011 23:11:59 +0000 (-0500) Subject: Merge branch 'db/length-as-hash' (early part) into db/svn-fe-code-purge X-Git-Tag: v1.7.12-rc0~41^2~13^2~4^2~5 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/5c674860eb8bfa1a8c0038fc2d32c4fc205e9d62?ds=inline;hp=-c Merge branch 'db/length-as-hash' (early part) into db/svn-fe-code-purge * 'db/length-as-hash' (early part): vcs-svn: implement perfect hash for top-level keys vcs-svn: implement perfect hash for node-prop keys vcs-svn: improve reporting of input errors vcs-svn: make buffer_copy_bytes return length read vcs-svn: make buffer_skip_bytes return length read vcs-svn: improve support for reading large files Conflicts: vcs-svn/fast_export.c vcs-svn/svndump.c --- 5c674860eb8bfa1a8c0038fc2d32c4fc205e9d62 diff --combined vcs-svn/fast_export.c index f19db9ae82,07a8353c8b..32f0c8cfcc --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@@ -8,58 -8,30 +8,58 @@@ #include "line_buffer.h" #include "repo_tree.h" #include "string_pool.h" +#include "strbuf.h" #define MAX_GITSVN_LINE_LEN 4096 static uint32_t first_commit_done; +static struct line_buffer report_buffer = LINE_BUFFER_INIT; -void fast_export_delete(uint32_t depth, uint32_t *path) +void fast_export_init(int fd) { - putchar('D'); - putchar(' '); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + if (buffer_fdinit(&report_buffer, fd)) + die_errno("cannot read from file descriptor %d", fd); } -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark) +void fast_export_deinit(void) +{ + if (buffer_deinit(&report_buffer)) + die_errno("error closing fast-import feedback stream"); +} + +void fast_export_reset(void) +{ + buffer_reset(&report_buffer); +} + +void fast_export_delete(uint32_t depth, const uint32_t *path) +{ + printf("D \""); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); +} + +static void fast_export_truncate(uint32_t depth, const uint32_t *path, uint32_t mode) +{ + fast_export_modify(depth, path, mode, "inline"); + printf("data 0\n\n"); +} + +void fast_export_modify(uint32_t depth, const uint32_t *path, uint32_t mode, + const char *dataref) { /* Mode must be 100644, 100755, 120000, or 160000. */ - printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); - pool_print_seq(depth, path, '/', stdout); - putchar('\n'); + if (!dataref) { + fast_export_truncate(depth, path, mode); + return; + } + printf("M %06"PRIo32" %s \"", mode, dataref); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, uint32_t author, char *log, +void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp) { @@@ -73,7 -45,6 +73,7 @@@ *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); + printf("mark :%"PRIu32"\n", revision); printf("committer %s <%s@%s> %ld +0000\n", ~author ? pool_fetch(author) : "nobody", ~author ? pool_fetch(author) : "nobody", @@@ -83,107 -54,32 +83,116 @@@ log, gitsvnline); if (!first_commit_done) { if (revision > 1) - printf("from refs/heads/master^0\n"); + printf("from :%"PRIu32"\n", revision - 1); first_commit_done = 1; } - repo_diff(revision - 1, revision); - fputc('\n', stdout); +} +void fast_export_end_commit(uint32_t revision) +{ printf("progress Imported commit %"PRIu32".\n\n", revision); } +static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path) +{ + /* ls :5 path/to/old/file */ + printf("ls :%"PRIu32" \"", rev); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); + fflush(stdout); +} + +static void ls_from_active_commit(uint32_t depth, const uint32_t *path) +{ + /* ls "path/to/file" */ + printf("ls \""); + pool_print_seq_q(depth, path, '/', stdout); + printf("\"\n"); + fflush(stdout); +} + +static const char *get_response_line(void) +{ + const char *line = buffer_read_line(&report_buffer); + if (line) + return line; + if (buffer_ferror(&report_buffer)) + die_errno("error reading from fast-import"); + die("unexpected end of fast-import feedback"); +} + + static void die_short_read(struct line_buffer *input) + { + if (buffer_ferror(input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); + } + -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) +void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(input, 5); len -= 5; + if (buffer_skip_bytes(input, 5) != 5) + die_short_read(input); } - printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); + printf("data %"PRIu32"\n", len); - buffer_copy_bytes(input, len); + if (buffer_copy_bytes(input, len) != len) + die_short_read(input); fputc('\n', stdout); } + +static int parse_ls_response(const char *response, uint32_t *mode, + struct strbuf *dataref) +{ + const char *tab; + const char *response_end; + + assert(response); + response_end = response + strlen(response); + + if (*response == 'm') { /* Missing. */ + errno = ENOENT; + return -1; + } + + /* Mode. */ + if (response_end - response < strlen("100644") || + response[strlen("100644")] != ' ') + die("invalid ls response: missing mode: %s", response); + *mode = 0; + for (; *response != ' '; response++) { + char ch = *response; + if (ch < '0' || ch > '7') + die("invalid ls response: mode is not octal: %s", response); + *mode *= 8; + *mode += ch - '0'; + } + + /* ' blob ' or ' tree ' */ + if (response_end - response < strlen(" blob ") || + (response[1] != 'b' && response[1] != 't')) + die("unexpected ls response: not a tree or blob: %s", response); + response += strlen(" blob "); + + /* Dataref. */ + tab = memchr(response, '\t', response_end - response); + if (!tab) + die("invalid ls response: missing tab: %s", response); + strbuf_add(dataref, response, tab - response); + return 0; +} + +int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_rev(rev, depth, path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +int fast_export_ls(uint32_t depth, const uint32_t *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_active_commit(depth, path); + return parse_ls_response(get_response_line(), mode, dataref); +} diff --combined vcs-svn/svndump.c index 7ecb227a6d,77680a31e8..ff985fe9e6 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@@ -14,19 -14,21 +14,25 @@@ #include "obj_pool.h" #include "string_pool.h" +#define REPORT_FILENO 3 + + /* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ + #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) + #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 #define NODEACT_ADD 2 #define NODEACT_CHANGE 1 #define NODEACT_UNKNOWN 0 -#define DUMP_CTX 0 -#define REV_CTX 1 -#define NODE_CTX 2 +/* States: */ +#define DUMP_CTX 0 /* dump metadata */ +#define REV_CTX 1 /* revision metadata */ +#define NODE_CTX 2 /* node metadata */ +#define INTERNODE_CTX 3 /* between nodes */ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 @@@ -36,8 -38,6 +42,8 @@@ obj_pool_gen(log, char, 4096 static struct line_buffer input = LINE_BUFFER_INIT; +#define REPORT_FILENO 3 + static char *log_copy(uint32_t length, const char *log) { char *buffer; @@@ -63,15 -63,6 +69,6 @@@ static struct uint32_t version, uuid, url; } dump_ctx; - static struct { - uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, - revision_number, node_path, node_kind, node_action, - node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version, - /* version 3 format */ - text_delta, prop_delta; - } keys; - static void reset_node_ctx(char *fname) { node_ctx.type = 0; @@@ -100,45 -91,43 +97,43 @@@ static void reset_dump_ctx(uint32_t url dump_ctx.uuid = ~0; } - static void init_keys(void) - { - keys.svn_log = pool_intern("svn:log"); - keys.svn_author = pool_intern("svn:author"); - keys.svn_date = pool_intern("svn:date"); - keys.svn_executable = pool_intern("svn:executable"); - keys.svn_special = pool_intern("svn:special"); - keys.uuid = pool_intern("UUID"); - keys.revision_number = pool_intern("Revision-number"); - keys.node_path = pool_intern("Node-path"); - keys.node_kind = pool_intern("Node-kind"); - keys.node_action = pool_intern("Node-action"); - keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); - keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); - keys.text_content_length = pool_intern("Text-content-length"); - keys.prop_content_length = pool_intern("Prop-content-length"); - keys.content_length = pool_intern("Content-length"); - keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); - /* version 3 format (Subversion 1.1.0) */ - keys.text_delta = pool_intern("Text-delta"); - keys.prop_delta = pool_intern("Prop-delta"); - } - - static void handle_property(uint32_t key, const char *val, uint32_t len, + static void handle_property(const struct strbuf *key_buf, + const char *val, uint32_t len, uint32_t *type_set) { - if (key == keys.svn_log) { + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; if (!val) die("invalid dump: unsets svn:log"); /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; if (!val) die("invalid dump: unsets svn:date"); if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) warning("invalid timestamp: %s", val); - } else if (key == keys.svn_executable || key == keys.svn_special) { + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; if (*type_set) { if (!val) return; @@@ -149,15 -138,22 +144,22 @@@ return; } *type_set = 1; - node_ctx.type = key == keys.svn_executable ? + node_ctx.type = keylen == strlen("svn:executable") ? REPO_MODE_EXE : REPO_MODE_LNK; } } + static void die_short_read(void) + { + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); + } + static void read_props(void) { - uint32_t key = ~0; + static struct strbuf key = STRBUF_INIT; const char *t; /* * NEEDSWORK: to support simple mode changes like @@@ -176,25 -172,37 +178,37 @@@ uint32_t len; const char *val; const char type = t[0]; + int ch; if (!type || t[1] != ' ') die("invalid property line: %s\n", t); len = atoi(&t[2]); val = buffer_read_string(&input, len); - buffer_skip_bytes(&input, 1); /* Discard trailing newline. */ + if (!val || strlen(val) != len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val); switch (type) { case 'K': - key = pool_intern(val); - continue; case 'D': - key = pool_intern(val); + strbuf_reset(&key); + if (val) + strbuf_add(&key, val, len); + if (type == 'K') + continue; + assert(type == 'D'); val = NULL; len = 0; /* fall through */ case 'V': - handle_property(key, val, len, &type_set); - key = ~0; + handle_property(&key, val, len, &type_set); + strbuf_reset(&key); continue; default: die("invalid property line: %s\n", t); @@@ -204,21 -212,15 +218,21 @@@ static void handle_node(void) { - uint32_t mark = 0; const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + /* + * Old text for this node: + * NULL - directory or bug + * empty_blob - empty + * "" - data retrievable from fast-import + */ + static const char *const empty_blob = "::empty::"; + const char *old_data = NULL; if (node_ctx.text_delta) die("text deltas not supported"); - if (have_text) - mark = next_blob_mark(); + if (node_ctx.action == NODEACT_DELETE) { if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " @@@ -238,15 -240,15 +252,15 @@@ die("invalid dump: directories cannot have text attached"); /* - * Decide on the new content (mark) and mode (node_ctx.type). + * Find old content (old_data) and decide on the new mode. */ if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { if (type != REPO_MODE_DIR) die("invalid dump: root of tree is not a regular file"); + old_data = NULL; } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode; - if (!have_text) - mark = repo_read_path(node_ctx.dst); + old_data = repo_read_path(node_ctx.dst); mode = repo_read_mode(node_ctx.dst); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); @@@ -254,11 -256,7 +268,11 @@@ die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (!have_text && type != REPO_MODE_DIR) + if (type == REPO_MODE_DIR) + old_data = NULL; + else if (have_text) + old_data = empty_blob; + else die("invalid dump: adds node without text"); } else { die("invalid dump: Node-path block lacks Node-action"); @@@ -277,34 -275,17 +291,34 @@@ /* * Save the result. */ - repo_add(node_ctx.dst, node_ctx.type, mark); - if (have_text) - fast_export_blob(node_ctx.type, mark, - node_ctx.textLength, &input); + if (type == REPO_MODE_DIR) /* directories are not tracked. */ + return; + assert(old_data); + if (old_data == empty_blob) + /* For the fast_export_* functions, NULL means empty. */ + old_data = NULL; + if (!have_text) { + fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, + node_ctx.type, old_data); + return; + } + fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst, + node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.textLength, &input); +} + +static void begin_revision(void) +{ + if (!rev_ctx.revision) /* revision 0 gets no git commit. */ + return; + fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); } -static void handle_revision(void) +static void end_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + fast_export_end_commit(rev_ctx.revision); } void svndump_read(const char *url) @@@ -313,48 -294,61 +327,65 @@@ char *t; uint32_t active_ctx = DUMP_CTX; uint32_t len; - uint32_t key; reset_dump_ctx(pool_intern(url)); while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) continue; - *val++ = '\0'; - *val++ = '\0'; - key = pool_intern(t); + val += 2; - if (key == keys.svn_fs_dump_format_version) { + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; dump_ctx.version = atoi(val); if (dump_ctx.version > 3) die("expected svn dump format version <= 3, found %"PRIu32, dump_ctx.version); - } else if (key == keys.uuid) { + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; dump_ctx.uuid = pool_intern(val); - } else if (key == keys.revision_number) { + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); - } else if (key == keys.node_path) { - if (active_ctx == NODE_CTX) - handle_node(); - if (active_ctx == REV_CTX) - begin_revision(); - active_ctx = NODE_CTX; - reset_node_ctx(val); - } else if (key == keys.node_kind) { + break; + case sizeof("Node-path"): + if (prefixcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); ++ if (active_ctx == REV_CTX) ++ begin_revision(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; if (!strcmp(val, "dir")) node_ctx.type = REPO_MODE_DIR; else if (!strcmp(val, "file")) node_ctx.type = REPO_MODE_BLB; else fprintf(stderr, "Unknown node-kind: %s\n", val); - } else if (key == keys.node_action) { + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; if (!strcmp(val, "delete")) { node_ctx.action = NODEACT_DELETE; } else if (!strcmp(val, "add")) { @@@ -367,56 -361,79 +398,81 @@@ fprintf(stderr, "Unknown node-action: %s\n", val); node_ctx.action = NODEACT_UNKNOWN; } - } else if (key == keys.node_copyfrom_path) { + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); - } else if (key == keys.node_copyfrom_rev) { + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; node_ctx.srcRev = atoi(val); - } else if (key == keys.text_content_length) { - node_ctx.textLength = atoi(val); - } else if (key == keys.prop_content_length) { + break; + case sizeof("Text-content-length"): + if (!constcmp(t, "Text-content-length")) { + node_ctx.textLength = atoi(val); + break; + } + if (constcmp(t, "Prop-content-length")) + continue; node_ctx.propLength = atoi(val); - } else if (key == keys.text_delta) { - node_ctx.text_delta = !strcmp(val, "true"); - } else if (key == keys.prop_delta) { + break; + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; node_ctx.prop_delta = !strcmp(val, "true"); - } else if (key == keys.content_length) { + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; len = atoi(val); - buffer_read_line(&input); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { handle_node(); - active_ctx = REV_CTX; + active_ctx = INTERNODE_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(&input, len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); } } } + if (buffer_ferror(&input)) + die_short_read(); if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); } int svndump_init(const char *filename) { if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); - repo_init(); + fast_export_init(REPORT_FILENO); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); - init_keys(); return 0; } void svndump_deinit(void) { log_reset(); - repo_reset(); + fast_export_deinit(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); @@@ -429,8 -446,8 +485,8 @@@ void svndump_reset(void) { log_reset(); + fast_export_reset(); buffer_reset(&input); - repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL);