Teach fast-import to ignore lines starting with '#'
authorShawn O. Pearce <spearce@spearce.org>
Wed, 1 Aug 2007 04:05:15 +0000 (00:05 -0400)
committerShawn O. Pearce <spearce@spearce.org>
Sun, 19 Aug 2007 07:38:35 +0000 (03:38 -0400)
Several frontend developers have asked that some form of stream
comments be permitted within a fast-import data stream. This way
they can include information from their own frontend program about
where specific data was taken from in the source system, or about
a decision that their frontend may have made while creating the
fast-import data stream.

This change introduces comments in the Bourne-shell/Tcl/Perl style.
Lines starting with '#' are ignored, up to and including the LF.
Unlike the above mentioned three languages however we do not look for
and ignore leading whitespace. This just simplifies the definition
of the comment format and the code that parses them.

To make comments work we had to stop using read_next_command() within
cmd_data() and directly invoke read_line() during the inline variant
of the function. This is necessary to retain any lines of the
input data that might otherwise look like a comment to fast-import.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Documentation/git-fast-import.txt
fast-import.c
t/t9300-fast-import.sh
index 30ee98d17f537e289c08d94d9b43728a4b147a34..a92ae6d1a36f800256a477f23c8280e2717b13c9 100644 (file)
@@ -176,6 +176,15 @@ results, such as branch names or file names with leading or trailing
 spaces in their name, or early termination of fast-import when it encounters
 unexpected input.
 
+Stream Comments
+~~~~~~~~~~~~~~~
+To aid in debugging frontends fast-import ignores any line that
+begins with `#` (ASCII pound/hash) up to and including the line
+ending `LF`.  A comment line may contain any sequence of bytes
+that does not contain an LF and therefore may be used to include
+any detailed debugging information that might be specific to the
+frontend and useful when inspecting a fast-import data stream.
+
 Date Formats
 ~~~~~~~~~~~~
 The following date formats are supported.  A frontend should select
@@ -689,6 +698,11 @@ intended for production-quality conversions should always use the
 exact byte count format, as it is more robust and performs better.
 The delimited format is intended primarily for testing fast-import.
 
+Comment lines appearing within the `<raw>` part of `data` commands
+are always taken to be part of the body of the data and are therefore
+never ignored by fast-import.  This makes it safe to import any
+file/message content whose lines might start with `#`.
+
 Exact byte count format::
        The frontend must specify the number of bytes of data.
 +
index d7fa2b7baaf19f9db46d268ac8954bf9623c76b1..98ebe4770d239eeee26aca339166bd271adc5d8d 100644 (file)
@@ -122,6 +122,17 @@ Format of STDIN stream:
   email ::= # valid GIT author/committer email;
   ts    ::= # time since the epoch in seconds, ascii base10 notation;
   tz    ::= # GIT style timezone;
+
+     # note: comments may appear anywhere in the input, except
+     # within a data command.  Any form of the data command
+     # always escapes the related input from comment processing.
+     #
+     # In case it is not clear, the '#' that starts the comment
+     # must be the first character on that the line (an lf have
+     # preceeded it).
+     #
+  comment ::= '#' not_lf* lf;
+  not_lf  ::= # Any byte that is not ASCII newline (LF);
 */
 
 #include "builtin.h"
@@ -1454,7 +1465,9 @@ static void dump_marks(void)
 
 static void read_next_command(void)
 {
-       read_line(&command_buf, stdin, '\n');
+       do {
+               read_line(&command_buf, stdin, '\n');
+       } while (!command_buf.eof && command_buf.buf[0] == '#');
 }
 
 static void cmd_mark(void)
@@ -1481,7 +1494,7 @@ static void *cmd_data (size_t *size)
                length = 0;
                buffer = xmalloc(sz);
                for (;;) {
-                       read_next_command();
+                       read_line(&command_buf, stdin, '\n');
                        if (command_buf.eof)
                                die("EOF in data (terminator '%s' not found)", term);
                        if (term_len == command_buf.len
index dac6135b22716bf8f39dfeb6ffd75cbdcf3ae22d..1f6426a49e5c42a865ca7fa2c736223b04105f46 100755 (executable)
@@ -778,4 +778,44 @@ test_expect_success \
        'git-fast-import <input &&
         test `git-rev-parse N2^{tree}` = `git-rev-parse N3^{tree}`'
 
+###
+### series O
+###
+
+cat >input <<INPUT_END
+#we will
+commit refs/heads/O1
+# -- ignore all of this text
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+# $GIT_COMMITTER_NAME has inserted here for his benefit.
+data <<COMMIT
+dirty directory copy
+COMMIT
+
+# don't forget the import blank line!
+#
+# yes, we started from our usual base of branch^0.
+# i like branch^0.
+from refs/heads/branch^0
+# and we need to reuse file2/file5 from N3 above.
+M 644 inline file2/file5
+# otherwise the tree will be different
+data <<EOF
+$file5_data
+EOF
+
+# don't forget to copy file2 to file3
+C file2 file3
+#
+# or to delete file5 from file2.
+D file2/file5
+# are we done yet?
+
+INPUT_END
+
+test_expect_success \
+       'O: comments are all skipped' \
+       'git-fast-import <input &&
+        test `git-rev-parse N3` = `git-rev-parse O1`'
+
 test_done