Merge branch 'jn/svn-fe'
authorJunio C Hamano <gitster@pobox.com>
Tue, 31 Aug 2010 23:23:38 +0000 (16:23 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 31 Aug 2010 23:23:38 +0000 (16:23 -0700)
* jn/svn-fe:
t/t9010-svn-fe.sh: add an +x bit to this test
t9010 (svn-fe): avoid symlinks in test
t9010 (svn-fe): use Unix-style path in URI
vcs-svn: Avoid %z in format string
vcs-svn: Rename dirent pool to build on Windows
compat: add strtok_r()
treap: style fix
vcs-svn: remove build artifacts on "make clean"
svn-fe manual: Clarify warning about deltas in dump files
Update svn-fe manual
SVN dump parser
Infrastructure to write revisions in fast-export format
Add stream helper library
Add string-specific memory pool
Add treap implementation
Add memory pool library
Introduce vcs-svn lib

31 files changed:
.gitignore
Makefile
compat/strtok_r.c [new file with mode: 0644]
config.mak.in
configure.ac
contrib/svn-fe/svn-fe.c
contrib/svn-fe/svn-fe.txt
git-compat-util.h
t/t0080-vcs-svn.sh [new file with mode: 0755]
t/t9010-svn-fe.sh [new file with mode: 0755]
test-line-buffer.c [new file with mode: 0644]
test-obj-pool.c [new file with mode: 0644]
test-string-pool.c [new file with mode: 0644]
test-svn-fe.c [new file with mode: 0644]
test-treap.c [new file with mode: 0644]
vcs-svn/LICENSE [new file with mode: 0644]
vcs-svn/fast_export.c [new file with mode: 0644]
vcs-svn/fast_export.h [new file with mode: 0644]
vcs-svn/line_buffer.c [new file with mode: 0644]
vcs-svn/line_buffer.h [new file with mode: 0644]
vcs-svn/line_buffer.txt [new file with mode: 0644]
vcs-svn/obj_pool.h [new file with mode: 0644]
vcs-svn/repo_tree.c [new file with mode: 0644]
vcs-svn/repo_tree.h [new file with mode: 0644]
vcs-svn/string_pool.c [new file with mode: 0644]
vcs-svn/string_pool.h [new file with mode: 0644]
vcs-svn/string_pool.txt [new file with mode: 0644]
vcs-svn/svndump.c [new file with mode: 0644]
vcs-svn/svndump.h [new file with mode: 0644]
vcs-svn/trp.h [new file with mode: 0644]
vcs-svn/trp.txt [new file with mode: 0644]
index fcdd822d8a3c97621fb712a1e14b7f62cbdd6d3b..4cb14e0baeb325f871d0e48cb03446a5309f92de 100644 (file)
 /test-dump-cache-tree
 /test-genrandom
 /test-index-version
+/test-line-buffer
 /test-match-trees
+/test-obj-pool
 /test-parse-options
 /test-path-utils
 /test-run-command
 /test-sha1
 /test-sigchain
+/test-string-pool
+/test-svn-fe
+/test-treap
 /common-cmds.h
 *.tar.gz
 *.dsc
index b4745a5412a206704ada573c7388e96b094ca050..32e3eb385d82126396c452f43b1fa70d5a7b4321 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,8 @@ all::
 #
 # Define NO_MKSTEMPS if you don't have mkstemps in the C library.
 #
+# Define NO_STRTOK_R if you don't have strtok_r in the C library.
+#
 # Define NO_LIBGEN_H if you don't have libgen.h.
 #
 # Define NEEDS_LIBGEN if your libgen needs -lgen when linking
@@ -408,12 +410,17 @@ TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-genrandom
+TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
+TEST_PROGRAMS_NEED_X += test-obj-pool
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
 TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-string-pool
+TEST_PROGRAMS_NEED_X += test-svn-fe
+TEST_PROGRAMS_NEED_X += test-treap
 TEST_PROGRAMS_NEED_X += test-index-version
 
 TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
@@ -468,6 +475,7 @@ export PYTHON_PATH
 
 LIB_FILE=libgit.a
 XDIFF_LIB=xdiff/lib.a
+VCSSVN_LIB=vcs-svn/lib.a
 
 LIB_H += advice.h
 LIB_H += archive.h
@@ -1035,6 +1043,7 @@ ifeq ($(uname_S),Windows)
        NO_UNSETENV = YesPlease
        NO_STRCASESTR = YesPlease
        NO_STRLCPY = YesPlease
+       NO_STRTOK_R = YesPlease
        NO_MEMMEM = YesPlease
        # NEEDS_LIBICONV = YesPlease
        NO_ICONV = YesPlease
@@ -1089,6 +1098,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
        NO_UNSETENV = YesPlease
        NO_STRCASESTR = YesPlease
        NO_STRLCPY = YesPlease
+       NO_STRTOK_R = YesPlease
        NO_MEMMEM = YesPlease
        NEEDS_LIBICONV = YesPlease
        OLD_ICONV = YesPlease
@@ -1319,6 +1329,10 @@ endif
 ifdef NO_STRTOULL
        COMPAT_CFLAGS += -DNO_STRTOULL
 endif
+ifdef NO_STRTOK_R
+       COMPAT_CFLAGS += -DNO_STRTOK_R
+       COMPAT_OBJS += compat/strtok_r.o
+endif
 ifdef NO_SETENV
        COMPAT_CFLAGS += -DNO_SETENV
        COMPAT_OBJS += compat/setenv.o
@@ -1739,7 +1753,9 @@ ifndef NO_CURL
 endif
 XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
        xdiff/xmerge.o xdiff/xpatience.o
-OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS)
+VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \
+       vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o
+OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)
 
 dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
 dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS))))
@@ -1861,6 +1877,11 @@ http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h
 xdiff-interface.o $(XDIFF_OBJS): \
        xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
        xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
+
+$(VCSSVN_OBJS): \
+       vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \
+       vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \
+       vcs-svn/svndump.h
 endif
 
 exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@@ -1909,6 +1930,8 @@ $(LIB_FILE): $(LIB_OBJS)
 $(XDIFF_LIB): $(XDIFF_OBJS)
        $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS)
 
+$(VCSSVN_LIB): $(VCSSVN_OBJS)
+       $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS)
 
 doc:
        $(MAKE) -C Documentation all
@@ -2007,12 +2030,18 @@ test-date$X: date.o ctype.o
 
 test-delta$X: diff-delta.o patch-delta.o
 
+test-line-buffer$X: vcs-svn/lib.a
+
 test-parse-options$X: parse-options.o
 
+test-string-pool$X: vcs-svn/lib.a
+
+test-svn-fe$X: vcs-svn/lib.a
+
 .PRECIOUS: $(TEST_OBJS)
 
 test-%$X: test-%.o $(GITLIBS)
-       $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+       $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)
 
 check-sha1:: test-sha1$X
        ./test-sha1.sh
@@ -2187,8 +2216,8 @@ distclean: clean
        $(RM) configure
 
 clean:
-       $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \
-               builtin/*.o $(LIB_FILE) $(XDIFF_LIB)
+       $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \
+               builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB)
        $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
        $(RM) $(TEST_PROGRAMS)
        $(RM) -r bin-wrappers
diff --git a/compat/strtok_r.c b/compat/strtok_r.c
new file mode 100644 (file)
index 0000000..7b5d568
--- /dev/null
@@ -0,0 +1,61 @@
+/* Reentrant string tokenizer.  Generic version.
+   Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "../git-compat-util.h"
+
+/* Parse S into tokens separated by characters in DELIM.
+   If S is NULL, the saved pointer in SAVE_PTR is used as
+   the next starting point.  For example:
+       char s[] = "-abc-=-def";
+       char *sp;
+       x = strtok_r(s, "-", &sp);      // x = "abc", sp = "=-def"
+       x = strtok_r(NULL, "-=", &sp);  // x = "def", sp = NULL
+       x = strtok_r(NULL, "=", &sp);   // x = NULL
+               // s = "abc\0-def\0"
+*/
+char *
+gitstrtok_r (char *s, const char *delim, char **save_ptr)
+{
+  char *token;
+
+  if (s == NULL)
+    s = *save_ptr;
+
+  /* Scan leading delimiters.  */
+  s += strspn (s, delim);
+  if (*s == '\0')
+    {
+      *save_ptr = s;
+      return NULL;
+    }
+
+  /* Find the end of the token.  */
+  token = s;
+  s = strpbrk (token, delim);
+  if (s == NULL)
+    /* This token finishes the string.  */
+    *save_ptr = token + strlen (token);
+  else
+    {
+      /* Terminate the token and make *SAVE_PTR point past it.  */
+      *s = '\0';
+      *save_ptr = s + 1;
+    }
+  return token;
+}
index b4e65c32b235eafafefeed1c755be7e1ad5c71ae..4ffd77420f089452edf23ec63b94ad88cf0f37f5 100644 (file)
@@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@
 NO_C99_FORMAT=@NO_C99_FORMAT@
 NO_HSTRERROR=@NO_HSTRERROR@
 NO_STRCASESTR=@NO_STRCASESTR@
+NO_STRTOK_R=@NO_STRTOK_R@
 NO_MEMMEM=@NO_MEMMEM@
 NO_STRLCPY=@NO_STRLCPY@
 NO_UINTMAX_T=@NO_UINTMAX_T@
index 5601e8bac953c670e35f32ffe48d157dd5694ce7..708e7b86ce2a55dffc162025c7a009a88b0d8c38 100644 (file)
@@ -783,6 +783,12 @@ GIT_CHECK_FUNC(strcasestr,
 [NO_STRCASESTR=YesPlease])
 AC_SUBST(NO_STRCASESTR)
 #
+# Define NO_STRTOK_R if you don't have strtok_r
+GIT_CHECK_FUNC(strtok_r,
+[NO_STRTOK_R=],
+[NO_STRTOK_R=YesPlease])
+AC_SUBST(NO_STRTOK_R)
+#
 # Define NO_MEMMEM if you don't have memmem.
 GIT_CHECK_FUNC(memmem,
 [NO_MEMMEM=],
index e9b9ba4da41c2de0a8ed8e1c540d2c108d0616f0..a2677b03e0ff8347d13a5d56f4fa2e1aba18824a 100644 (file)
@@ -10,6 +10,7 @@ int main(int argc, char **argv)
 {
        svndump_init(NULL);
        svndump_read((argc > 1) ? argv[1] : NULL);
+       svndump_deinit();
        svndump_reset();
        return 0;
 }
index de30f83a1f94aa8dd2bf442e22634524e2689187..35f84bd9e7577b58bc9905c8b46806bfb37963ff 100644 (file)
@@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import
 DESCRIPTION
 -----------
 
-Converts a Subversion dumpfile (version: 2) into input suitable for
+Converts a Subversion dumpfile into input suitable for
 git-fast-import(1) and similar importers. REPO is a path to a
 Subversion repository mirrored on the local disk. Remote Subversion
 repositories can be mirrored on local disk using the `svnsync`
@@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in
 Files in this format can be generated using the 'svnadmin dump' or
 'svk admin dump' command.
 
+Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3)
+are not supported.
+
 OUTPUT FORMAT
 -------------
 The fast-import format is documented by the git-fast-import(1)
@@ -43,11 +46,9 @@ user <user@UUID>
 as committer, where 'user' is the value of the `svn:author` property
 and 'UUID' the repository's identifier.
 
-To support incremental imports, 'svn-fe' will put a `git-svn-id`
-line at the end of each commit log message if passed an url on the
-command line.  This line has the form `git-svn-id: URL@REVNO UUID`.
-
-Empty directories and unknown properties are silently discarded.
+To support incremental imports, 'svn-fe' puts a `git-svn-id` line at
+the end of each commit log message if passed an url on the command
+line.  This line has the form `git-svn-id: URL@REVNO UUID`.
 
 The resulting repository will generally require further processing
 to put each project in its own repository and to separate the history
@@ -56,9 +57,9 @@ may be useful for this purpose.
 
 BUGS
 ----
-Litters the current working directory with .bin files for
-persistence. Will be fixed when the svn-fe infrastructure is aware of
-a Git working directory.
+Empty directories and unknown properties are silently discarded.
+
+The exit status does not reflect whether an error was detected.
 
 SEE ALSO
 --------
index fe845ae639767dc8f56a9196a7faefb468737bba..877096ecb09e524174ff22db3722d6428f43bcf4 100644 (file)
@@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t);
 extern uintmax_t gitstrtoumax(const char *, char **, int);
 #endif
 
+#ifdef NO_STRTOK_R
+#define strtok_r gitstrtok_r
+extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr);
+#endif
+
 #ifdef NO_HSTRERROR
 #define hstrerror githstrerror
 extern const char *githstrerror(int herror);
diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh
new file mode 100755 (executable)
index 0000000..d3225ad
--- /dev/null
@@ -0,0 +1,171 @@
+#!/bin/sh
+
+test_description='check infrastructure for svn importer'
+
+. ./test-lib.sh
+uint32_max=4294967295
+
+test_expect_success 'obj pool: store data' '
+       cat <<-\EOF >expected &&
+       0
+       1
+       EOF
+
+       test-obj-pool <<-\EOF >actual &&
+       alloc one 16
+       set one 13
+       test one 13
+       reset one
+       EOF
+       test_cmp expected actual
+'
+
+test_expect_success 'obj pool: NULL is offset ~0' '
+       echo "$uint32_max" >expected &&
+       echo null one | test-obj-pool >actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'obj pool: out-of-bounds access' '
+       cat <<-EOF >expected &&
+       0
+       0
+       $uint32_max
+       $uint32_max
+       16
+       20
+       $uint32_max
+       EOF
+
+       test-obj-pool <<-\EOF >actual &&
+       alloc one 16
+       alloc two 16
+       offset one 20
+       offset two 20
+       alloc one 5
+       offset one 20
+       free one 1
+       offset one 20
+       reset one
+       reset two
+       EOF
+       test_cmp expected actual
+'
+
+test_expect_success 'obj pool: high-water mark' '
+       cat <<-\EOF >expected &&
+       0
+       0
+       10
+       20
+       20
+       20
+       EOF
+
+       test-obj-pool <<-\EOF >actual &&
+       alloc one 10
+       committed one
+       alloc one 10
+       commit one
+       committed one
+       alloc one 10
+       free one 20
+       committed one
+       reset one
+       EOF
+       test_cmp expected actual
+'
+
+test_expect_success 'line buffer' '
+       echo HELLO >expected1 &&
+       printf "%s\n" "" HELLO >expected2 &&
+       echo >expected3 &&
+       printf "%s\n" "" Q | q_to_nul >expected4 &&
+       printf "%s\n" foo "" >expected5 &&
+       printf "%s\n" "" foo >expected6 &&
+
+       test-line-buffer <<-\EOF >actual1 &&
+       5
+       HELLO
+       EOF
+
+       test-line-buffer <<-\EOF >actual2 &&
+       0
+
+       5
+       HELLO
+       EOF
+
+       q_to_nul <<-\EOF |
+       1
+       Q
+       EOF
+       test-line-buffer >actual3 &&
+
+       q_to_nul <<-\EOF |
+       0
+
+       1
+       Q
+       EOF
+       test-line-buffer >actual4 &&
+
+       test-line-buffer <<-\EOF >actual5 &&
+       5
+       foo
+       EOF
+
+       test-line-buffer <<-\EOF >actual6 &&
+       0
+
+       5
+       foo
+       EOF
+
+       test_cmp expected1 actual1 &&
+       test_cmp expected2 actual2 &&
+       test_cmp expected3 actual3 &&
+       test_cmp expected4 actual4 &&
+       test_cmp expected5 actual5 &&
+       test_cmp expected6 actual6
+'
+
+test_expect_success 'string pool' '
+       echo a does not equal b >expected.differ &&
+       echo a equals a >expected.match &&
+       echo equals equals equals >expected.matchmore &&
+
+       test-string-pool "a,--b" >actual.differ &&
+       test-string-pool "a,a" >actual.match &&
+       test-string-pool "equals-equals" >actual.matchmore &&
+       test_must_fail test-string-pool a,a,a &&
+       test_must_fail test-string-pool a &&
+
+       test_cmp expected.differ actual.differ &&
+       test_cmp expected.match actual.match &&
+       test_cmp expected.matchmore actual.matchmore
+'
+
+test_expect_success 'treap sort' '
+       cat <<-\EOF >unsorted &&
+       68
+       12
+       13
+       13
+       68
+       13
+       13
+       21
+       10
+       11
+       12
+       13
+       13
+       EOF
+       sort unsorted >expected &&
+
+       test-treap <unsorted >actual &&
+       test_cmp expected actual
+'
+
+test_done
diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh
new file mode 100755 (executable)
index 0000000..a713dfc
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+test_description='check svn dumpfile importer'
+
+. ./lib-git-svn.sh
+
+test_dump() {
+       label=$1
+       dump=$2
+       test_expect_success "$dump" '
+               svnadmin create "$label-svn" &&
+               svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" &&
+               svn_cmd export "file://$PWD/$label-svn" "$label-svnco" &&
+               git init "$label-git" &&
+               test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" &&
+               (
+                       cd "$label-git" &&
+                       git fast-import < ../"$label.fe"
+               ) &&
+               (
+                       cd "$label-svnco" &&
+                       git init &&
+                       git add . &&
+                       git fetch "../$label-git" master &&
+                       git diff --exit-code FETCH_HEAD
+               )
+       '
+}
+
+test_dump simple t9135/svn.dump
+
+test_done
diff --git a/test-line-buffer.c b/test-line-buffer.c
new file mode 100644 (file)
index 0000000..c11bf7f
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * test-line-buffer.c: code to exercise the svn importer's input helper
+ *
+ * Input format:
+ *     number NL
+ *     (number bytes) NL
+ *     number NL
+ *     ...
+ */
+
+#include "git-compat-util.h"
+#include "vcs-svn/line_buffer.h"
+
+static uint32_t strtouint32(const char *s)
+{
+       char *end;
+       uintmax_t n = strtoumax(s, &end, 10);
+       if (*s == '\0' || *end != '\0')
+               die("invalid count: %s", s);
+       return (uint32_t) n;
+}
+
+int main(int argc, char *argv[])
+{
+       char *s;
+
+       if (argc != 1)
+               usage("test-line-buffer < input.txt");
+       if (buffer_init(NULL))
+               die_errno("open error");
+       while ((s = buffer_read_line())) {
+               s = buffer_read_string(strtouint32(s));
+               fputs(s, stdout);
+               fputc('\n', stdout);
+               buffer_skip_bytes(1);
+               if (!(s = buffer_read_line()))
+                       break;
+               buffer_copy_bytes(strtouint32(s) + 1);
+       }
+       if (buffer_deinit())
+               die("input error");
+       if (ferror(stdout))
+               die("output error");
+       buffer_reset();
+       return 0;
+}
diff --git a/test-obj-pool.c b/test-obj-pool.c
new file mode 100644 (file)
index 0000000..5018863
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * test-obj-pool.c: code to exercise the svn importer's object pool
+ */
+
+#include "cache.h"
+#include "vcs-svn/obj_pool.h"
+
+enum pool { POOL_ONE, POOL_TWO };
+obj_pool_gen(one, int, 1)
+obj_pool_gen(two, int, 4096)
+
+static uint32_t strtouint32(const char *s)
+{
+       char *end;
+       uintmax_t n = strtoumax(s, &end, 10);
+       if (*s == '\0' || (*end != '\n' && *end != '\0'))
+               die("invalid offset: %s", s);
+       return (uint32_t) n;
+}
+
+static void handle_command(const char *command, enum pool pool, const char *arg)
+{
+       switch (*command) {
+       case 'a':
+               if (!prefixcmp(command, "alloc ")) {
+                       uint32_t n = strtouint32(arg);
+                       printf("%"PRIu32"\n",
+                               pool == POOL_ONE ?
+                               one_alloc(n) : two_alloc(n));
+                       return;
+               }
+       case 'c':
+               if (!prefixcmp(command, "commit ")) {
+                       pool == POOL_ONE ? one_commit() : two_commit();
+                       return;
+               }
+               if (!prefixcmp(command, "committed ")) {
+                       printf("%"PRIu32"\n",
+                               pool == POOL_ONE ?
+                               one_pool.committed : two_pool.committed);
+                       return;
+               }
+       case 'f':
+               if (!prefixcmp(command, "free ")) {
+                       uint32_t n = strtouint32(arg);
+                       pool == POOL_ONE ? one_free(n) : two_free(n);
+                       return;
+               }
+       case 'n':
+               if (!prefixcmp(command, "null ")) {
+                       printf("%"PRIu32"\n",
+                               pool == POOL_ONE ?
+                               one_offset(NULL) : two_offset(NULL));
+                       return;
+               }
+       case 'o':
+               if (!prefixcmp(command, "offset ")) {
+                       uint32_t n = strtouint32(arg);
+                       printf("%"PRIu32"\n",
+                               pool == POOL_ONE ?
+                               one_offset(one_pointer(n)) :
+                               two_offset(two_pointer(n)));
+                       return;
+               }
+       case 'r':
+               if (!prefixcmp(command, "reset ")) {
+                       pool == POOL_ONE ? one_reset() : two_reset();
+                       return;
+               }
+       case 's':
+               if (!prefixcmp(command, "set ")) {
+                       uint32_t n = strtouint32(arg);
+                       if (pool == POOL_ONE)
+                               *one_pointer(n) = 1;
+                       else
+                               *two_pointer(n) = 1;
+                       return;
+               }
+       case 't':
+               if (!prefixcmp(command, "test ")) {
+                       uint32_t n = strtouint32(arg);
+                       printf("%d\n", pool == POOL_ONE ?
+                               *one_pointer(n) : *two_pointer(n));
+                       return;
+               }
+       default:
+               die("unrecognized command: %s", command);
+       }
+}
+
+static void handle_line(const char *line)
+{
+       const char *arg = strchr(line, ' ');
+       enum pool pool;
+
+       if (arg && !prefixcmp(arg + 1, "one"))
+               pool = POOL_ONE;
+       else if (arg && !prefixcmp(arg + 1, "two"))
+               pool = POOL_TWO;
+       else
+               die("no pool specified: %s", line);
+
+       handle_command(line, pool, arg + strlen("one "));
+}
+
+int main(int argc, char *argv[])
+{
+       struct strbuf sb = STRBUF_INIT;
+       if (argc != 1)
+               usage("test-obj-str < script");
+
+       while (strbuf_getline(&sb, stdin, '\n') != EOF)
+               handle_line(sb.buf);
+       strbuf_release(&sb);
+       return 0;
+}
diff --git a/test-string-pool.c b/test-string-pool.c
new file mode 100644 (file)
index 0000000..c5782e6
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * test-string-pool.c: code to exercise the svn importer's string pool
+ */
+
+#include "git-compat-util.h"
+#include "vcs-svn/string_pool.h"
+
+int main(int argc, char *argv[])
+{
+       const uint32_t unequal = pool_intern("does not equal");
+       const uint32_t equal = pool_intern("equals");
+       uint32_t buf[3];
+       uint32_t n;
+
+       if (argc != 2)
+               usage("test-string-pool <string>,<string>");
+
+       n = pool_tok_seq(3, buf, ",-", argv[1]);
+       if (n >= 3)
+               die("too many strings");
+       if (n <= 1)
+               die("too few strings");
+
+       buf[2] = buf[1];
+       buf[1] = (buf[0] == buf[2]) ? equal : unequal;
+       pool_print_seq(3, buf, ' ', stdout);
+       fputc('\n', stdout);
+
+       pool_reset();
+       return 0;
+}
diff --git a/test-svn-fe.c b/test-svn-fe.c
new file mode 100644 (file)
index 0000000..77cf78a
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * test-svn-fe: Code to exercise the svn import lib
+ */
+
+#include "git-compat-util.h"
+#include "vcs-svn/svndump.h"
+
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               usage("test-svn-fe <file>");
+       svndump_init(argv[1]);
+       svndump_read(NULL);
+       svndump_deinit();
+       svndump_reset();
+       return 0;
+}
diff --git a/test-treap.c b/test-treap.c
new file mode 100644 (file)
index 0000000..cdba511
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * test-treap.c: code to exercise the svn importer's treap structure
+ */
+
+#include "cache.h"
+#include "vcs-svn/obj_pool.h"
+#include "vcs-svn/trp.h"
+
+struct int_node {
+       uintmax_t n;
+       struct trp_node children;
+};
+
+obj_pool_gen(node, struct int_node, 3)
+
+static int node_cmp(struct int_node *a, struct int_node *b)
+{
+       return (a->n > b->n) - (a->n < b->n);
+}
+
+trp_gen(static, treap_, struct int_node, children, node, node_cmp)
+
+static void strtonode(struct int_node *item, const char *s)
+{
+       char *end;
+       item->n = strtoumax(s, &end, 10);
+       if (*s == '\0' || (*end != '\n' && *end != '\0'))
+               die("invalid integer: %s", s);
+}
+
+int main(int argc, char *argv[])
+{
+       struct strbuf sb = STRBUF_INIT;
+       struct trp_root root = { ~0 };
+       uint32_t item;
+
+       if (argc != 1)
+               usage("test-treap < ints");
+
+       while (strbuf_getline(&sb, stdin, '\n') != EOF) {
+               item = node_alloc(1);
+               strtonode(node_pointer(item), sb.buf);
+               treap_insert(&root, node_pointer(item));
+       }
+
+       item = node_offset(treap_first(&root));
+       while (~item) {
+               uint32_t next;
+               struct int_node *tmp = node_pointer(node_alloc(1));
+
+               tmp->n = node_pointer(item)->n;
+               next = node_offset(treap_next(&root, node_pointer(item)));
+
+               treap_remove(&root, node_pointer(item));
+               item = node_offset(treap_nsearch(&root, tmp));
+
+               if (item != next && (!~item || node_pointer(item)->n != tmp->n))
+                       die("found %"PRIuMAX" in place of %"PRIuMAX"",
+                               ~item ? node_pointer(item)->n : ~(uintmax_t) 0,
+                               ~next ? node_pointer(next)->n : ~(uintmax_t) 0);
+               printf("%"PRIuMAX"\n", tmp->n);
+       }
+       node_reset();
+       return 0;
+}
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE
new file mode 100644 (file)
index 0000000..0a5e3c4
--- /dev/null
@@ -0,0 +1,33 @@
+Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
+All rights reserved.
+
+Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+
+Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH,
+Frankfurt/Main, Germany
+and others, see http://svn2cc.sarovar.org
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice(s), this list of conditions and the following disclaimer
+   unmodified other than the allowable addition of one or more
+   copyright notices.
+2. Redistributions in binary form must reproduce the above copyright
+   notice(s), this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
new file mode 100644 (file)
index 0000000..256a052
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "repo_tree.h"
+#include "string_pool.h"
+
+#define MAX_GITSVN_LINE_LEN 4096
+
+static uint32_t first_commit_done;
+
+void fast_export_delete(uint32_t depth, uint32_t *path)
+{
+       putchar('D');
+       putchar(' ');
+       pool_print_seq(depth, path, '/', stdout);
+       putchar('\n');
+}
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                       uint32_t mark)
+{
+       /* Mode must be 100644, 100755, 120000, or 160000. */
+       printf("M %06o :%d ", mode, mark);
+       pool_print_seq(depth, path, '/', stdout);
+       putchar('\n');
+}
+
+static char gitsvnline[MAX_GITSVN_LINE_LEN];
+void fast_export_commit(uint32_t revision, uint32_t author, char *log,
+                       uint32_t uuid, uint32_t url,
+                       unsigned long timestamp)
+{
+       if (!log)
+               log = "";
+       if (~uuid && ~url) {
+               snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n",
+                                pool_fetch(url), revision, pool_fetch(uuid));
+       } else {
+               *gitsvnline = '\0';
+       }
+       printf("commit refs/heads/master\n");
+       printf("committer %s <%s@%s> %ld +0000\n",
+                  ~author ? pool_fetch(author) : "nobody",
+                  ~author ? pool_fetch(author) : "nobody",
+                  ~uuid ? pool_fetch(uuid) : "local", timestamp);
+       printf("data %"PRIu32"\n%s%s\n",
+                  (uint32_t) (strlen(log) + strlen(gitsvnline)),
+                  log, gitsvnline);
+       if (!first_commit_done) {
+               if (revision > 1)
+                       printf("from refs/heads/master^0\n");
+               first_commit_done = 1;
+       }
+       repo_diff(revision - 1, revision);
+       fputc('\n', stdout);
+
+       printf("progress Imported commit %d.\n\n", revision);
+}
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len)
+{
+       if (mode == REPO_MODE_LNK) {
+               /* svn symlink blobs start with "link " */
+               buffer_skip_bytes(5);
+               len -= 5;
+       }
+       printf("blob\nmark :%d\ndata %d\n", mark, len);
+       buffer_copy_bytes(len);
+       fputc('\n', stdout);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
new file mode 100644 (file)
index 0000000..2aaaea5
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef FAST_EXPORT_H_
+#define FAST_EXPORT_H_
+
+void fast_export_delete(uint32_t depth, uint32_t *path);
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+                       uint32_t mark);
+void fast_export_commit(uint32_t revision, uint32_t author, char *log,
+                       uint32_t uuid, uint32_t url, unsigned long timestamp);
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len);
+
+#endif
diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
new file mode 100644 (file)
index 0000000..1543567
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+
+#define LINE_BUFFER_LEN 10000
+#define COPY_BUFFER_LEN 4096
+
+/* Create memory pool for char sequence of known length */
+obj_pool_gen(blob, char, 4096)
+
+static char line_buffer[LINE_BUFFER_LEN];
+static char byte_buffer[COPY_BUFFER_LEN];
+static FILE *infile;
+
+int buffer_init(const char *filename)
+{
+       infile = filename ? fopen(filename, "r") : stdin;
+       if (!infile)
+               return -1;
+       return 0;
+}
+
+int buffer_deinit(void)
+{
+       int err;
+       if (infile == stdin)
+               return ferror(infile);
+       err = ferror(infile);
+       err |= fclose(infile);
+       return err;
+}
+
+/* Read a line without trailing newline. */
+char *buffer_read_line(void)
+{
+       char *end;
+       if (!fgets(line_buffer, sizeof(line_buffer), infile))
+               /* Error or data exhausted. */
+               return NULL;
+       end = line_buffer + strlen(line_buffer);
+       if (end[-1] == '\n')
+               end[-1] = '\0';
+       else if (feof(infile))
+               ; /* No newline at end of file.  That's fine. */
+       else
+               /*
+                * Line was too long.
+                * There is probably a saner way to deal with this,
+                * but for now let's return an error.
+                */
+               return NULL;
+       return line_buffer;
+}
+
+char *buffer_read_string(uint32_t len)
+{
+       char *s;
+       blob_free(blob_pool.size);
+       s = blob_pointer(blob_alloc(len + 1));
+       s[fread(s, 1, len, infile)] = '\0';
+       return ferror(infile) ? NULL : s;
+}
+
+void buffer_copy_bytes(uint32_t len)
+{
+       uint32_t in;
+       while (len > 0 && !feof(infile) && !ferror(infile)) {
+               in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+               in = fread(byte_buffer, 1, in, infile);
+               len -= in;
+               fwrite(byte_buffer, 1, in, stdout);
+               if (ferror(stdout)) {
+                       buffer_skip_bytes(len);
+                       return;
+               }
+       }
+}
+
+void buffer_skip_bytes(uint32_t len)
+{
+       uint32_t in;
+       while (len > 0 && !feof(infile) && !ferror(infile)) {
+               in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+               in = fread(byte_buffer, 1, in, infile);
+               len -= in;
+       }
+}
+
+void buffer_reset(void)
+{
+       blob_reset();
+}
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
new file mode 100644 (file)
index 0000000..9c78ae1
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef LINE_BUFFER_H_
+#define LINE_BUFFER_H_
+
+int buffer_init(const char *filename);
+int buffer_deinit(void);
+char *buffer_read_line(void);
+char *buffer_read_string(uint32_t len);
+void buffer_copy_bytes(uint32_t len);
+void buffer_skip_bytes(uint32_t len);
+void buffer_reset(void);
+
+#endif
diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt
new file mode 100644 (file)
index 0000000..8906fb1
--- /dev/null
@@ -0,0 +1,58 @@
+line_buffer API
+===============
+
+The line_buffer library provides a convenient interface for
+mostly-line-oriented input.
+
+Each line is not permitted to exceed 10000 bytes.  The provided
+functions are not thread-safe or async-signal-safe, and like
+`fgets()`, they generally do not function correctly if interrupted
+by a signal without SA_RESTART set.
+
+Calling sequence
+----------------
+
+The calling program:
+
+ - specifies a file to read with `buffer_init`
+ - processes input with `buffer_read_line`, `buffer_read_string`,
+   `buffer_skip_bytes`, and `buffer_copy_bytes`
+ - closes the file with `buffer_deinit`, perhaps to start over and
+   read another file.
+
+Before exiting, the caller can use `buffer_reset` to deallocate
+resources for the benefit of profiling tools.
+
+Functions
+---------
+
+`buffer_init`::
+       Open the named file for input.  If filename is NULL,
+       start reading from stdin.  On failure, returns -1 (with
+       errno indicating the nature of the failure).
+
+`buffer_deinit`::
+       Stop reading from the current file (closing it unless
+       it was stdin).  Returns nonzero if `fclose` fails or
+       the error indicator was set.
+
+`buffer_read_line`::
+       Read a line and strip off the trailing newline.
+       On failure or end of file, returns NULL.
+
+`buffer_read_string`::
+       Read `len` characters of input or up to the end of the
+       file, whichever comes first.  Returns NULL on error.
+       Returns whatever characters were read (possibly "")
+       for end of file.
+
+`buffer_copy_bytes`::
+       Read `len` bytes of input and dump them to the standard output
+       stream.  Returns early for error or end of file.
+
+`buffer_skip_bytes`::
+       Discards `len` bytes from the input stream (stopping early
+       if necessary because of an error or eof).
+
+`buffer_reset`::
+       Deallocates non-static buffers.
diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h
new file mode 100644 (file)
index 0000000..deb6eb8
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#ifndef OBJ_POOL_H_
+#define OBJ_POOL_H_
+
+#include "git-compat-util.h"
+
+#define MAYBE_UNUSED __attribute__((__unused__))
+
+#define obj_pool_gen(pre, obj_t, initial_capacity) \
+static struct { \
+       uint32_t committed; \
+       uint32_t size; \
+       uint32_t capacity; \
+       obj_t *base; \
+} pre##_pool = {0, 0, 0, NULL}; \
+static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \
+{ \
+       uint32_t offset; \
+       if (pre##_pool.size + count > pre##_pool.capacity) { \
+               while (pre##_pool.size + count > pre##_pool.capacity) \
+                       if (pre##_pool.capacity) \
+                               pre##_pool.capacity *= 2; \
+                       else \
+                               pre##_pool.capacity = initial_capacity; \
+               pre##_pool.base = realloc(pre##_pool.base, \
+                                       pre##_pool.capacity * sizeof(obj_t)); \
+       } \
+       offset = pre##_pool.size; \
+       pre##_pool.size += count; \
+       return offset; \
+} \
+static MAYBE_UNUSED void pre##_free(uint32_t count) \
+{ \
+       pre##_pool.size -= count; \
+} \
+static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \
+{ \
+       return obj == NULL ? ~0 : obj - pre##_pool.base; \
+} \
+static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \
+{ \
+       return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \
+} \
+static MAYBE_UNUSED void pre##_commit(void) \
+{ \
+       pre##_pool.committed = pre##_pool.size; \
+} \
+static MAYBE_UNUSED void pre##_reset(void) \
+{ \
+       free(pre##_pool.base); \
+       pre##_pool.base = NULL; \
+       pre##_pool.size = 0; \
+       pre##_pool.capacity = 0; \
+       pre##_pool.committed = 0; \
+}
+
+#endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
new file mode 100644 (file)
index 0000000..e94d91d
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+
+#include "string_pool.h"
+#include "repo_tree.h"
+#include "obj_pool.h"
+#include "fast_export.h"
+
+#include "trp.h"
+
+struct repo_dirent {
+       uint32_t name_offset;
+       struct trp_node children;
+       uint32_t mode;
+       uint32_t content_offset;
+};
+
+struct repo_dir {
+       struct trp_root entries;
+};
+
+struct repo_commit {
+       uint32_t root_dir_offset;
+};
+
+/* Memory pools for commit, dir and dirent */
+obj_pool_gen(commit, struct repo_commit, 4096)
+obj_pool_gen(dir, struct repo_dir, 4096)
+obj_pool_gen(dent, struct repo_dirent, 4096)
+
+static uint32_t active_commit;
+static uint32_t mark;
+
+static int repo_dirent_name_cmp(const void *a, const void *b);
+
+/* Treap for directory entries */
+trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp);
+
+uint32_t next_blob_mark(void)
+{
+       return mark++;
+}
+
+static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit)
+{
+       return dir_pointer(commit->root_dir_offset);
+}
+
+static struct repo_dirent *repo_first_dirent(struct repo_dir *dir)
+{
+       return dent_first(&dir->entries);
+}
+
+static int repo_dirent_name_cmp(const void *a, const void *b)
+{
+       const struct repo_dirent *dent1 = a, *dent2 = b;
+       uint32_t a_offset = dent1->name_offset;
+       uint32_t b_offset = dent2->name_offset;
+       return (a_offset > b_offset) - (a_offset < b_offset);
+}
+
+static int repo_dirent_is_dir(struct repo_dirent *dent)
+{
+       return dent != NULL && dent->mode == REPO_MODE_DIR;
+}
+
+static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent)
+{
+       if (!repo_dirent_is_dir(dent))
+               return NULL;
+       return dir_pointer(dent->content_offset);
+}
+
+static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir)
+{
+       uint32_t orig_o, new_o;
+       orig_o = dir_offset(orig_dir);
+       if (orig_o >= dir_pool.committed)
+               return orig_dir;
+       new_o = dir_alloc(1);
+       orig_dir = dir_pointer(orig_o);
+       *dir_pointer(new_o) = *orig_dir;
+       return dir_pointer(new_o);
+}
+
+static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path)
+{
+       uint32_t name = 0;
+       struct repo_dirent *key = dent_pointer(dent_alloc(1));
+       struct repo_dir *dir = NULL;
+       struct repo_dirent *dent = NULL;
+       dir = repo_commit_root_dir(commit_pointer(revision));
+       while (~(name = *path++)) {
+               key->name_offset = name;
+               dent = dent_search(&dir->entries, key);
+               if (dent == NULL || !repo_dirent_is_dir(dent))
+                       break;
+               dir = repo_dir_from_dirent(dent);
+       }
+       dent_free(1);
+       return dent;
+}
+
+static void repo_write_dirent(uint32_t *path, uint32_t mode,
+                             uint32_t content_offset, uint32_t del)
+{
+       uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0;
+       struct repo_dir *dir;
+       struct repo_dirent *key;
+       struct repo_dirent *dent = NULL;
+       revision = active_commit;
+       dir = repo_commit_root_dir(commit_pointer(revision));
+       dir = repo_clone_dir(dir);
+       commit_pointer(revision)->root_dir_offset = dir_offset(dir);
+       while (~(name = *path++)) {
+               parent_dir_o = dir_offset(dir);
+
+               key = dent_pointer(dent_alloc(1));
+               key->name_offset = name;
+
+               dent = dent_search(&dir->entries, key);
+               if (dent == NULL)
+                       dent = key;
+               else
+                       dent_free(1);
+
+               if (dent == key) {
+                       dent->mode = REPO_MODE_DIR;
+                       dent->content_offset = 0;
+                       dent_insert(&dir->entries, dent);
+               }
+
+               if (dent_offset(dent) < dent_pool.committed) {
+                       dir_o = repo_dirent_is_dir(dent) ?
+                                       dent->content_offset : ~0;
+                       dent_remove(&dir->entries, dent);
+                       dent = dent_pointer(dent_alloc(1));
+                       dent->name_offset = name;
+                       dent->mode = REPO_MODE_DIR;
+                       dent->content_offset = dir_o;
+                       dent_insert(&dir->entries, dent);
+               }
+
+               dir = repo_dir_from_dirent(dent);
+               dir = repo_clone_dir(dir);
+               dent->content_offset = dir_offset(dir);
+       }
+       if (dent == NULL)
+               return;
+       dent->mode = mode;
+       dent->content_offset = content_offset;
+       if (del && ~parent_dir_o)
+               dent_remove(&dir_pointer(parent_dir_o)->entries, dent);
+}
+
+uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst)
+{
+       uint32_t mode = 0, content_offset = 0;
+       struct repo_dirent *src_dent;
+       src_dent = repo_read_dirent(revision, src);
+       if (src_dent != NULL) {
+               mode = src_dent->mode;
+               content_offset = src_dent->content_offset;
+               repo_write_dirent(dst, mode, content_offset, 0);
+       }
+       return mode;
+}
+
+void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark)
+{
+       repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+uint32_t repo_replace(uint32_t *path, uint32_t blob_mark)
+{
+       uint32_t mode = 0;
+       struct repo_dirent *src_dent;
+       src_dent = repo_read_dirent(active_commit, path);
+       if (src_dent != NULL) {
+               mode = src_dent->mode;
+               repo_write_dirent(path, mode, blob_mark, 0);
+       }
+       return mode;
+}
+
+void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark)
+{
+       struct repo_dirent *src_dent;
+       src_dent = repo_read_dirent(active_commit, path);
+       if (src_dent != NULL && blob_mark == 0)
+               blob_mark = src_dent->content_offset;
+       repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+void repo_delete(uint32_t *path)
+{
+       repo_write_dirent(path, 0, 0, 1);
+}
+
+static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir);
+
+static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent)
+{
+       if (repo_dirent_is_dir(dent))
+               repo_git_add_r(depth, path, repo_dir_from_dirent(dent));
+       else
+               fast_export_modify(depth, path,
+                                  dent->mode, dent->content_offset);
+}
+
+static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir)
+{
+       struct repo_dirent *de = repo_first_dirent(dir);
+       while (de) {
+               path[depth] = de->name_offset;
+               repo_git_add(depth + 1, path, de);
+               de = dent_next(&dir->entries, de);
+       }
+}
+
+static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1,
+                       struct repo_dir *dir2)
+{
+       struct repo_dirent *de1, *de2;
+       de1 = repo_first_dirent(dir1);
+       de2 = repo_first_dirent(dir2);
+
+       while (de1 && de2) {
+               if (de1->name_offset < de2->name_offset) {
+                       path[depth] = de1->name_offset;
+                       fast_export_delete(depth + 1, path);
+                       de1 = dent_next(&dir1->entries, de1);
+                       continue;
+               }
+               if (de1->name_offset > de2->name_offset) {
+                       path[depth] = de2->name_offset;
+                       repo_git_add(depth + 1, path, de2);
+                       de2 = dent_next(&dir2->entries, de2);
+                       continue;
+               }
+               path[depth] = de1->name_offset;
+
+               if (de1->mode == de2->mode &&
+                   de1->content_offset == de2->content_offset) {
+                       ; /* No change. */
+               } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
+                       repo_diff_r(depth + 1, path,
+                                   repo_dir_from_dirent(de1),
+                                   repo_dir_from_dirent(de2));
+               } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) {
+                       repo_git_add(depth + 1, path, de2);
+               } else {
+                       fast_export_delete(depth + 1, path);
+                       repo_git_add(depth + 1, path, de2);
+               }
+               de1 = dent_next(&dir1->entries, de1);
+               de2 = dent_next(&dir2->entries, de2);
+       }
+       while (de1) {
+               path[depth] = de1->name_offset;
+               fast_export_delete(depth + 1, path);
+               de1 = dent_next(&dir1->entries, de1);
+       }
+       while (de2) {
+               path[depth] = de2->name_offset;
+               repo_git_add(depth + 1, path, de2);
+               de2 = dent_next(&dir2->entries, de2);
+       }
+}
+
+static uint32_t path_stack[REPO_MAX_PATH_DEPTH];
+
+void repo_diff(uint32_t r1, uint32_t r2)
+{
+       repo_diff_r(0,
+                   path_stack,
+                   repo_commit_root_dir(commit_pointer(r1)),
+                   repo_commit_root_dir(commit_pointer(r2)));
+}
+
+void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
+                uint32_t url, unsigned long timestamp)
+{
+       fast_export_commit(revision, author, log, uuid, url, timestamp);
+       dent_commit();
+       dir_commit();
+       active_commit = commit_alloc(1);
+       commit_pointer(active_commit)->root_dir_offset =
+               commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+static void mark_init(void)
+{
+       uint32_t i;
+       mark = 0;
+       for (i = 0; i < dent_pool.size; i++)
+               if (!repo_dirent_is_dir(dent_pointer(i)) &&
+                   dent_pointer(i)->content_offset > mark)
+                       mark = dent_pointer(i)->content_offset;
+       mark++;
+}
+
+void repo_init(void)
+{
+       mark_init();
+       if (commit_pool.size == 0) {
+               /* Create empty tree for commit 0. */
+               commit_alloc(1);
+               commit_pointer(0)->root_dir_offset = dir_alloc(1);
+               dir_pointer(0)->entries.trp_root = ~0;
+               dir_commit();
+       }
+       /* Preallocate next commit, ready for changes. */
+       active_commit = commit_alloc(1);
+       commit_pointer(active_commit)->root_dir_offset =
+               commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+void repo_reset(void)
+{
+       pool_reset();
+       commit_reset();
+       dir_reset();
+       dent_reset();
+}
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
new file mode 100644 (file)
index 0000000..5476175
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef REPO_TREE_H_
+#define REPO_TREE_H_
+
+#include "git-compat-util.h"
+
+#define REPO_MODE_DIR 0040000
+#define REPO_MODE_BLB 0100644
+#define REPO_MODE_EXE 0100755
+#define REPO_MODE_LNK 0120000
+
+#define REPO_MAX_PATH_LEN 4096
+#define REPO_MAX_PATH_DEPTH 1000
+
+uint32_t next_blob_mark(void);
+uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst);
+void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
+uint32_t repo_replace(uint32_t *path, uint32_t blob_mark);
+void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark);
+void repo_delete(uint32_t *path);
+void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
+                uint32_t url, long unsigned timestamp);
+void repo_diff(uint32_t r1, uint32_t r2);
+void repo_init(void);
+void repo_reset(void);
+
+#endif
diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
new file mode 100644 (file)
index 0000000..f5b1da8
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+#include "trp.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+static struct trp_root tree = { ~0 };
+
+struct node {
+       uint32_t offset;
+       struct trp_node children;
+};
+
+/* Two memory pools: one for struct node, and another for strings */
+obj_pool_gen(node, struct node, 4096)
+obj_pool_gen(string, char, 4096)
+
+static char *node_value(struct node *node)
+{
+       return node ? string_pointer(node->offset) : NULL;
+}
+
+static int node_cmp(struct node *a, struct node *b)
+{
+       return strcmp(node_value(a), node_value(b));
+}
+
+/* Build a Treap from the node structure (a trp_node w/ offset) */
+trp_gen(static, tree_, struct node, children, node, node_cmp);
+
+const char *pool_fetch(uint32_t entry)
+{
+       return node_value(node_pointer(entry));
+}
+
+uint32_t pool_intern(const char *key)
+{
+       /* Canonicalize key */
+       struct node *match = NULL, *node;
+       uint32_t key_len;
+       if (key == NULL)
+               return ~0;
+       key_len = strlen(key) + 1;
+       node = node_pointer(node_alloc(1));
+       node->offset = string_alloc(key_len);
+       strcpy(node_value(node), key);
+       match = tree_search(&tree, node);
+       if (!match) {
+               tree_insert(&tree, node);
+       } else {
+               node_free(1);
+               string_free(key_len);
+               node = match;
+       }
+       return node_offset(node);
+}
+
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
+{
+       char *token = strtok_r(str, delim, saveptr);
+       return token ? pool_intern(token) : ~0;
+}
+
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
+{
+       uint32_t i;
+       for (i = 0; i < len && ~seq[i]; i++) {
+               fputs(pool_fetch(seq[i]), stream);
+               if (i < len - 1 && ~seq[i + 1])
+                       fputc(delim, stream);
+       }
+}
+
+uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
+{
+       char *context = NULL;
+       uint32_t token = ~0;
+       uint32_t length;
+
+       if (sz == 0)
+               return ~0;
+       if (str)
+               token = pool_tok_r(str, delim, &context);
+       for (length = 0; length < sz; length++) {
+               seq[length] = token;
+               if (token == ~0)
+                       return length;
+               token = pool_tok_r(NULL, delim, &context);
+       }
+       seq[sz - 1] = ~0;
+       return sz;
+}
+
+void pool_reset(void)
+{
+       node_reset();
+       string_reset();
+}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
new file mode 100644 (file)
index 0000000..222fb66
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef STRING_POOL_H_
+#define STRING_POOL_H_
+
+uint32_t pool_intern(const char *key);
+const char *pool_fetch(uint32_t entry);
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
+uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
+void pool_reset(void);
+
+#endif
diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt
new file mode 100644 (file)
index 0000000..1b41f15
--- /dev/null
@@ -0,0 +1,43 @@
+string_pool API
+===============
+
+The string_pool API provides facilities for replacing strings
+with integer keys that can be more easily compared and stored.
+The facilities are designed so that one could teach Git without
+too much trouble to store the information needed for these keys to
+remain valid over multiple executions.
+
+Functions
+---------
+
+pool_intern::
+       Include a string in the string pool and get its key.
+       If that string is already in the pool, retrieves its
+       existing key.
+
+pool_fetch::
+       Retrieve the string associated to a given key.
+
+pool_tok_r::
+       Extract the key of the next token from a string.
+       Interface mimics strtok_r.
+
+pool_print_seq::
+       Print a sequence of strings named by key to a file, using the
+       specified delimiter to separate them.
+
+       If NULL (key ~0) appears in the sequence, the sequence ends
+       early.
+
+pool_tok_seq::
+       Split a string into tokens, storing the keys of segments
+       into a caller-provided array.
+
+       Unless sz is 0, the array will always be ~0-terminated.
+       If there is not enough room for all the tokens, the
+       array holds as many tokens as fit in the entries before
+       the terminating ~0.  Return value is the index after the
+       last token, or sz if the tokens did not fit.
+
+pool_reset::
+       Deallocate storage for the string pool.
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
new file mode 100644 (file)
index 0000000..630eeb5
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * Parse and rearrange a svnadmin dump.
+ * Create the dump with:
+ * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
+ *
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "cache.h"
+#include "repo_tree.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+#define NODEACT_REPLACE 4
+#define NODEACT_DELETE 3
+#define NODEACT_ADD 2
+#define NODEACT_CHANGE 1
+#define NODEACT_UNKNOWN 0
+
+#define DUMP_CTX 0
+#define REV_CTX  1
+#define NODE_CTX 2
+
+#define LENGTH_UNKNOWN (~0)
+#define DATE_RFC2822_LEN 31
+
+/* Create memory pool for log messages */
+obj_pool_gen(log, char, 4096)
+
+static char* log_copy(uint32_t length, char *log)
+{
+       char *buffer;
+       log_free(log_pool.size);
+       buffer = log_pointer(log_alloc(length));
+       strncpy(buffer, log, length);
+       return buffer;
+}
+
+static struct {
+       uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
+       uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+} node_ctx;
+
+static struct {
+       uint32_t revision, author;
+       unsigned long timestamp;
+       char *log;
+} rev_ctx;
+
+static struct {
+       uint32_t uuid, url;
+} dump_ctx;
+
+static struct {
+       uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
+               revision_number, node_path, node_kind, node_action,
+               node_copyfrom_path, node_copyfrom_rev, text_content_length,
+               prop_content_length, content_length;
+} keys;
+
+static void reset_node_ctx(char *fname)
+{
+       node_ctx.type = 0;
+       node_ctx.action = NODEACT_UNKNOWN;
+       node_ctx.propLength = LENGTH_UNKNOWN;
+       node_ctx.textLength = LENGTH_UNKNOWN;
+       node_ctx.src[0] = ~0;
+       node_ctx.srcRev = 0;
+       node_ctx.srcMode = 0;
+       pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+       node_ctx.mark = 0;
+}
+
+static void reset_rev_ctx(uint32_t revision)
+{
+       rev_ctx.revision = revision;
+       rev_ctx.timestamp = 0;
+       rev_ctx.log = NULL;
+       rev_ctx.author = ~0;
+}
+
+static void reset_dump_ctx(uint32_t url)
+{
+       dump_ctx.url = url;
+       dump_ctx.uuid = ~0;
+}
+
+static void init_keys(void)
+{
+       keys.svn_log = pool_intern("svn:log");
+       keys.svn_author = pool_intern("svn:author");
+       keys.svn_date = pool_intern("svn:date");
+       keys.svn_executable = pool_intern("svn:executable");
+       keys.svn_special = pool_intern("svn:special");
+       keys.uuid = pool_intern("UUID");
+       keys.revision_number = pool_intern("Revision-number");
+       keys.node_path = pool_intern("Node-path");
+       keys.node_kind = pool_intern("Node-kind");
+       keys.node_action = pool_intern("Node-action");
+       keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
+       keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
+       keys.text_content_length = pool_intern("Text-content-length");
+       keys.prop_content_length = pool_intern("Prop-content-length");
+       keys.content_length = pool_intern("Content-length");
+}
+
+static void read_props(void)
+{
+       uint32_t len;
+       uint32_t key = ~0;
+       char *val = NULL;
+       char *t;
+       while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
+               if (!strncmp(t, "K ", 2)) {
+                       len = atoi(&t[2]);
+                       key = pool_intern(buffer_read_string(len));
+                       buffer_read_line();
+               } else if (!strncmp(t, "V ", 2)) {
+                       len = atoi(&t[2]);
+                       val = buffer_read_string(len);
+                       if (key == keys.svn_log) {
+                               /* Value length excludes terminating nul. */
+                               rev_ctx.log = log_copy(len + 1, val);
+                       } else if (key == keys.svn_author) {
+                               rev_ctx.author = pool_intern(val);
+                       } else if (key == keys.svn_date) {
+                               if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
+                                       fprintf(stderr, "Invalid timestamp: %s\n", val);
+                       } else if (key == keys.svn_executable) {
+                               node_ctx.type = REPO_MODE_EXE;
+                       } else if (key == keys.svn_special) {
+                               node_ctx.type = REPO_MODE_LNK;
+                       }
+                       key = ~0;
+                       buffer_read_line();
+               }
+       }
+}
+
+static void handle_node(void)
+{
+       if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength)
+               read_props();
+
+       if (node_ctx.srcRev)
+               node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+
+       if (node_ctx.textLength != LENGTH_UNKNOWN &&
+           node_ctx.type != REPO_MODE_DIR)
+               node_ctx.mark = next_blob_mark();
+
+       if (node_ctx.action == NODEACT_DELETE) {
+               repo_delete(node_ctx.dst);
+       } else if (node_ctx.action == NODEACT_CHANGE ||
+                          node_ctx.action == NODEACT_REPLACE) {
+               if (node_ctx.action == NODEACT_REPLACE &&
+                   node_ctx.type == REPO_MODE_DIR)
+                       repo_replace(node_ctx.dst, node_ctx.mark);
+               else if (node_ctx.propLength != LENGTH_UNKNOWN)
+                       repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+               else if (node_ctx.textLength != LENGTH_UNKNOWN)
+                       node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+       } else if (node_ctx.action == NODEACT_ADD) {
+               if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN)
+                       repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+               else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN)
+                       node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+               else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) ||
+                        node_ctx.textLength != LENGTH_UNKNOWN)
+                       repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
+       }
+
+       if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode)
+               node_ctx.type = node_ctx.srcMode;
+
+       if (node_ctx.mark)
+               fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
+       else if (node_ctx.textLength != LENGTH_UNKNOWN)
+               buffer_skip_bytes(node_ctx.textLength);
+}
+
+static void handle_revision(void)
+{
+       if (rev_ctx.revision)
+               repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+                       dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+}
+
+void svndump_read(const char *url)
+{
+       char *val;
+       char *t;
+       uint32_t active_ctx = DUMP_CTX;
+       uint32_t len;
+       uint32_t key;
+
+       reset_dump_ctx(pool_intern(url));
+       while ((t = buffer_read_line())) {
+               val = strstr(t, ": ");
+               if (!val)
+                       continue;
+               *val++ = '\0';
+               *val++ = '\0';
+               key = pool_intern(t);
+
+               if (key == keys.uuid) {
+                       dump_ctx.uuid = pool_intern(val);
+               } else if (key == keys.revision_number) {
+                       if (active_ctx == NODE_CTX)
+                               handle_node();
+                       if (active_ctx != DUMP_CTX)
+                               handle_revision();
+                       active_ctx = REV_CTX;
+                       reset_rev_ctx(atoi(val));
+               } else if (key == keys.node_path) {
+                       if (active_ctx == NODE_CTX)
+                               handle_node();
+                       active_ctx = NODE_CTX;
+                       reset_node_ctx(val);
+               } else if (key == keys.node_kind) {
+                       if (!strcmp(val, "dir"))
+                               node_ctx.type = REPO_MODE_DIR;
+                       else if (!strcmp(val, "file"))
+                               node_ctx.type = REPO_MODE_BLB;
+                       else
+                               fprintf(stderr, "Unknown node-kind: %s\n", val);
+               } else if (key == keys.node_action) {
+                       if (!strcmp(val, "delete")) {
+                               node_ctx.action = NODEACT_DELETE;
+                       } else if (!strcmp(val, "add")) {
+                               node_ctx.action = NODEACT_ADD;
+                       } else if (!strcmp(val, "change")) {
+                               node_ctx.action = NODEACT_CHANGE;
+                       } else if (!strcmp(val, "replace")) {
+                               node_ctx.action = NODEACT_REPLACE;
+                       } else {
+                               fprintf(stderr, "Unknown node-action: %s\n", val);
+                               node_ctx.action = NODEACT_UNKNOWN;
+                       }
+               } else if (key == keys.node_copyfrom_path) {
+                       pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+               } else if (key == keys.node_copyfrom_rev) {
+                       node_ctx.srcRev = atoi(val);
+               } else if (key == keys.text_content_length) {
+                       node_ctx.textLength = atoi(val);
+               } else if (key == keys.prop_content_length) {
+                       node_ctx.propLength = atoi(val);
+               } else if (key == keys.content_length) {
+                       len = atoi(val);
+                       buffer_read_line();
+                       if (active_ctx == REV_CTX) {
+                               read_props();
+                       } else if (active_ctx == NODE_CTX) {
+                               handle_node();
+                               active_ctx = REV_CTX;
+                       } else {
+                               fprintf(stderr, "Unexpected content length header: %d\n", len);
+                               buffer_skip_bytes(len);
+                       }
+               }
+       }
+       if (active_ctx == NODE_CTX)
+               handle_node();
+       if (active_ctx != DUMP_CTX)
+               handle_revision();
+}
+
+void svndump_init(const char *filename)
+{
+       buffer_init(filename);
+       repo_init();
+       reset_dump_ctx(~0);
+       reset_rev_ctx(0);
+       reset_node_ctx(NULL);
+       init_keys();
+}
+
+void svndump_deinit(void)
+{
+       log_reset();
+       repo_reset();
+       reset_dump_ctx(~0);
+       reset_rev_ctx(0);
+       reset_node_ctx(NULL);
+       if (buffer_deinit())
+               fprintf(stderr, "Input error\n");
+       if (ferror(stdout))
+               fprintf(stderr, "Output error\n");
+}
+
+void svndump_reset(void)
+{
+       log_reset();
+       buffer_reset();
+       repo_reset();
+       reset_dump_ctx(~0);
+       reset_rev_ctx(0);
+       reset_node_ctx(NULL);
+}
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
new file mode 100644 (file)
index 0000000..93c412f
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef SVNDUMP_H_
+#define SVNDUMP_H_
+
+void svndump_init(const char *filename);
+void svndump_read(const char *url);
+void svndump_deinit(void);
+void svndump_reset(void);
+
+#endif
diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
new file mode 100644 (file)
index 0000000..ee35c68
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * C macro implementation of treaps.
+ *
+ * Usage:
+ *   #include <stdint.h>
+ *   #include "trp.h"
+ *   trp_gen(...)
+ *
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#ifndef TRP_H_
+#define TRP_H_
+
+#define MAYBE_UNUSED __attribute__((__unused__))
+
+/* Node structure. */
+struct trp_node {
+       uint32_t trpn_left;
+       uint32_t trpn_right;
+};
+
+/* Root structure. */
+struct trp_root {
+       uint32_t trp_root;
+};
+
+/* Pointer/Offset conversion. */
+#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
+#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
+#define trpn_modify(a_base, a_offset) \
+       do { \
+               if ((a_offset) < a_base##_pool.committed) { \
+                       uint32_t old_offset = (a_offset);\
+                       (a_offset) = a_base##_alloc(1); \
+                       *trpn_pointer(a_base, a_offset) = \
+                               *trpn_pointer(a_base, old_offset); \
+               } \
+       } while (0)
+
+/* Left accessors. */
+#define trp_left_get(a_base, a_field, a_node) \
+       (trpn_pointer(a_base, a_node)->a_field.trpn_left)
+#define trp_left_set(a_base, a_field, a_node, a_left) \
+       do { \
+               trpn_modify(a_base, a_node); \
+               trp_left_get(a_base, a_field, a_node) = (a_left); \
+       } while (0)
+
+/* Right accessors. */
+#define trp_right_get(a_base, a_field, a_node) \
+       (trpn_pointer(a_base, a_node)->a_field.trpn_right)
+#define trp_right_set(a_base, a_field, a_node, a_right) \
+       do { \
+               trpn_modify(a_base, a_node); \
+               trp_right_get(a_base, a_field, a_node) = (a_right); \
+       } while (0)
+
+/*
+ * Fibonacci hash function.
+ * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2).
+ * See Knuth §6.4: volume 3, 3rd ed, p518.
+ */
+#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))
+
+/* Priority accessors. */
+#define trp_prio_get(a_node) trpn_hash(a_node)
+
+/* Node initializer. */
+#define trp_node_new(a_base, a_field, a_node) \
+       do { \
+               trp_left_set(a_base, a_field, (a_node), ~0); \
+               trp_right_set(a_base, a_field, (a_node), ~0); \
+       } while (0)
+
+/* Internal utility macros. */
+#define trpn_first(a_base, a_field, a_root, r_node) \
+       do { \
+               (r_node) = (a_root); \
+               if ((r_node) == ~0) \
+                       return NULL; \
+               while (~trp_left_get(a_base, a_field, (r_node))) \
+                       (r_node) = trp_left_get(a_base, a_field, (r_node)); \
+       } while (0)
+
+#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
+       do { \
+               (r_node) = trp_right_get(a_base, a_field, (a_node)); \
+               trp_right_set(a_base, a_field, (a_node), \
+                       trp_left_get(a_base, a_field, (r_node))); \
+               trp_left_set(a_base, a_field, (r_node), (a_node)); \
+       } while (0)
+
+#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
+       do { \
+               (r_node) = trp_left_get(a_base, a_field, (a_node)); \
+               trp_left_set(a_base, a_field, (a_node), \
+                       trp_right_get(a_base, a_field, (r_node))); \
+               trp_right_set(a_base, a_field, (r_node), (a_node)); \
+       } while (0)
+
+#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
+a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
+{ \
+       uint32_t ret; \
+       trpn_first(a_base, a_field, treap->trp_root, ret); \
+       return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
+{ \
+       uint32_t ret; \
+       uint32_t offset = trpn_offset(a_base, node); \
+       if (~trp_right_get(a_base, a_field, offset)) { \
+               trpn_first(a_base, a_field, \
+                       trp_right_get(a_base, a_field, offset), ret); \
+       } else { \
+               uint32_t tnode = treap->trp_root; \
+               ret = ~0; \
+               while (1) { \
+                       int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
+                               trpn_pointer(a_base, tnode)); \
+                       if (cmp < 0) { \
+                               ret = tnode; \
+                               tnode = trp_left_get(a_base, a_field, tnode); \
+                       } else if (cmp > 0) { \
+                               tnode = trp_right_get(a_base, a_field, tnode); \
+                       } else { \
+                               break; \
+                       } \
+               } \
+       } \
+       return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
+{ \
+       int cmp; \
+       uint32_t ret = treap->trp_root; \
+       while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
+               if (cmp < 0) { \
+                       ret = trp_left_get(a_base, a_field, ret); \
+               } else { \
+                       ret = trp_right_get(a_base, a_field, ret); \
+               } \
+       } \
+       return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \
+{ \
+       int cmp; \
+       uint32_t ret = treap->trp_root; \
+       while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \
+               if (cmp < 0) { \
+                       if (!~trp_left_get(a_base, a_field, ret)) \
+                               break; \
+                       ret = trp_left_get(a_base, a_field, ret); \
+               } else { \
+                       ret = trp_right_get(a_base, a_field, ret); \
+               } \
+       } \
+       return trpn_pointer(a_base, ret); \
+} \
+a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
+{ \
+       if (cur_node == ~0) { \
+               return ins_node; \
+       } else { \
+               uint32_t ret; \
+               int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
+                                       trpn_pointer(a_base, cur_node)); \
+               if (cmp < 0) { \
+                       uint32_t left = a_pre##insert_recurse( \
+                               trp_left_get(a_base, a_field, cur_node), ins_node); \
+                       trp_left_set(a_base, a_field, cur_node, left); \
+                       if (trp_prio_get(left) < trp_prio_get(cur_node)) \
+                               trpn_rotate_right(a_base, a_field, cur_node, ret); \
+                       else \
+                               ret = cur_node; \
+               } else { \
+                       uint32_t right = a_pre##insert_recurse( \
+                               trp_right_get(a_base, a_field, cur_node), ins_node); \
+                       trp_right_set(a_base, a_field, cur_node, right); \
+                       if (trp_prio_get(right) < trp_prio_get(cur_node)) \
+                               trpn_rotate_left(a_base, a_field, cur_node, ret); \
+                       else \
+                               ret = cur_node; \
+               } \
+               return ret; \
+       } \
+} \
+a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
+{ \
+       uint32_t offset = trpn_offset(a_base, node); \
+       trp_node_new(a_base, a_field, offset); \
+       treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
+} \
+a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
+{ \
+       int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
+                       trpn_pointer(a_base, cur_node)); \
+       if (cmp == 0) { \
+               uint32_t ret; \
+               uint32_t left = trp_left_get(a_base, a_field, cur_node); \
+               uint32_t right = trp_right_get(a_base, a_field, cur_node); \
+               if (left == ~0) { \
+                       if (right == ~0) \
+                               return ~0; \
+               } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
+                       trpn_rotate_right(a_base, a_field, cur_node, ret); \
+                       right = a_pre##remove_recurse(cur_node, rem_node); \
+                       trp_right_set(a_base, a_field, ret, right); \
+                       return ret; \
+               } \
+               trpn_rotate_left(a_base, a_field, cur_node, ret); \
+               left = a_pre##remove_recurse(cur_node, rem_node); \
+               trp_left_set(a_base, a_field, ret, left); \
+               return ret; \
+       } else if (cmp < 0) { \
+               uint32_t left = a_pre##remove_recurse( \
+                       trp_left_get(a_base, a_field, cur_node), rem_node); \
+               trp_left_set(a_base, a_field, cur_node, left); \
+               return cur_node; \
+       } else { \
+               uint32_t right = a_pre##remove_recurse( \
+                       trp_right_get(a_base, a_field, cur_node), rem_node); \
+               trp_right_set(a_base, a_field, cur_node, right); \
+               return cur_node; \
+       } \
+} \
+a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
+{ \
+       treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
+               trpn_offset(a_base, node)); \
+} \
+
+#endif
diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt
new file mode 100644 (file)
index 0000000..eb4c191
--- /dev/null
@@ -0,0 +1,103 @@
+Motivation
+==========
+
+Treaps provide a memory-efficient binary search tree structure.
+Insertion/deletion/search are about as about as fast in the average
+case as red-black trees and the chances of worst-case behavior are
+vanishingly small, thanks to (pseudo-)randomness.  The bad worst-case
+behavior is a small price to pay, given that treaps are much simpler
+to implement.
+
+API
+===
+
+The trp API generates a data structure and functions to handle a
+large growing set of objects stored in a pool.
+
+The caller:
+
+. Specifies parameters for the generated functions with the
+  trp_gen(static, foo_, ...) macro.
+
+. Allocates a `struct trp_root` variable and sets it to {~0}.
+
+. Adds new nodes to the set using `foo_insert`.
+
+. Can find a specific item in the set using `foo_search`.
+
+. Can iterate over items in the set using `foo_first` and `foo_next`.
+
+. Can remove an item from the set using `foo_remove`.
+
+Example:
+
+----
+struct ex_node {
+       const char *s;
+       struct trp_node ex_link;
+};
+static struct trp_root ex_base = {~0};
+obj_pool_gen(ex, struct ex_node, 4096);
+trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
+struct ex_node *item;
+
+item = ex_pointer(ex_alloc(1));
+item->s = "hello";
+ex_insert(&ex_base, item);
+item = ex_pointer(ex_alloc(1));
+item->s = "goodbye";
+ex_insert(&ex_base, item);
+for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
+       printf("%s\n", item->s);
+----
+
+Functions
+---------
+
+trp_gen(attr, foo_, node_type, link_field, pool, cmp)::
+
+       Generate a type-specific treap implementation.
++
+. The storage class for generated functions will be 'attr' (e.g., `static`).
+. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
+. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
+  This type must be a struct with at least one `struct trp_node` field
+  to point to its children.
+. The field used to access child nodes will be 'link_field'.
+. All treap nodes must lie in the 'pool' object pool.
+. Treap nodes must be totally ordered by the 'cmp' relation, with the
+  following prototype:
++
+int (*cmp)(node_type \*a, node_type \*b)
++
+and returning a value less than, equal to, or greater than zero
+according to the result of comparison.
+
+void foo_insert(struct trp_root *treap, node_type \*node)::
+
+       Insert node into treap.  If inserted multiple times,
+       a node will appear in the treap multiple times.
+
+void foo_remove(struct trp_root *treap, node_type \*node)::
+
+       Remove node from treap.  Caller must ensure node is
+       present in treap before using this function.
+
+node_type *foo_search(struct trp_root \*treap, node_type \*key)::
+
+       Search for a node that matches key.  If no match is found,
+       result is NULL.
+
+node_type *foo_nsearch(struct trp_root \*treap, node_type \*key)::
+
+       Like `foo_search`, but if if the key is missing return what
+       would be key's successor, were key in treap (NULL if no
+       successor).
+
+node_type *foo_first(struct trp_root \*treap)::
+
+       Find the first item from the treap, in sorted order.
+
+node_type *foo_next(struct trp_root \*treap, node_type \*node)::
+
+       Find the next item.