fast-import: let importers retrieve blobs
authorDavid Barr <david.barr@cordelta.com>
Sun, 28 Nov 2010 19:45:01 +0000 (13:45 -0600)
committerJunio C Hamano <gitster@pobox.com>
Wed, 1 Dec 2010 21:27:37 +0000 (13:27 -0800)
New objects written by fast-import are not available immediately.
Until a checkpoint has been started and finishes writing the pack
index, any new blobs will not be accessible using standard git tools.

So introduce a new way to access them: a "cat-blob" command in the
command stream requests for fast-import to print a blob to stdout or a
file descriptor specified by the argument to --cat-blob-fd. The value
for cat-blob-fd cannot be specified in the stream because that would
be a layering violation: the decision of where to direct a stream has
to be made when fast-import is started anyway, so we might as well
make the stream format is independent of that detail.

Output uses the same format as "git cat-file --batch".

Thanks to Sverre Rabbelier and Sam Vilain for guidance in designing
the protocol.

Based-on-patch-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-fast-import.txt
fast-import.c
t/t9300-fast-import.sh
index ef86763948dbebefd810032bfca1e757d2016920..5d8f60c7909e0fe3dbbdf900cc885115bc932dc8 100644 (file)
@@ -92,6 +92,11 @@ OPTIONS
        --(no-)-relative-marks= with the --(import|export)-marks=
        options.
 
        --(no-)-relative-marks= with the --(import|export)-marks=
        options.
 
+--cat-blob-fd=<fd>::
+       Specify the file descriptor that will be written to
+       when the `cat-blob` command is encountered in the stream.
+       The default behaviour is to write to `stdout`.
+
 --export-pack-edges=<file>::
        After creating a packfile, print a line of data to
        <file> listing the filename of the packfile and the last
 --export-pack-edges=<file>::
        After creating a packfile, print a line of data to
        <file> listing the filename of the packfile and the last
@@ -320,6 +325,11 @@ and control the current import process.  More detailed discussion
        standard output.  This command is optional and is not needed
        to perform an import.
 
        standard output.  This command is optional and is not needed
        to perform an import.
 
+`cat-blob`::
+       Causes fast-import to print a blob in 'cat-file --batch'
+       format to the file descriptor set with `--cat-blob-fd` or
+       `stdout` if unspecified.
+
 `feature`::
        Require that fast-import supports the specified feature, or
        abort if it does not.
 `feature`::
        Require that fast-import supports the specified feature, or
        abort if it does not.
@@ -879,6 +889,29 @@ Placing a `progress` command immediately after a `checkpoint` will
 inform the reader when the `checkpoint` has been completed and it
 can safely access the refs that fast-import updated.
 
 inform the reader when the `checkpoint` has been completed and it
 can safely access the refs that fast-import updated.
 
+`cat-blob`
+~~~~~~~~~~
+Causes fast-import to print a blob to a file descriptor previously
+arranged with the `--cat-blob-fd` argument.  The command otherwise
+has no impact on the current import; its main purpose is to
+retrieve blobs that may be in fast-import's memory but not
+accessible from the target repository.
+
+....
+       'cat-blob' SP <dataref> LF
+....
+
+The `<dataref>` can be either a mark reference (`:<idnum>`)
+set previously or a full 40-byte SHA-1 of a Git blob, preexisting or
+ready to be written.
+
+output uses the same format as `git cat-file --batch`:
+
+====
+       <sha1> SP 'blob' SP <size> LF
+       <contents> LF
+====
+
 `feature`
 ~~~~~~~~~
 Require that fast-import supports the specified feature, or abort if
 `feature`
 ~~~~~~~~~
 Require that fast-import supports the specified feature, or abort if
@@ -905,6 +938,13 @@ import-marks::
        second, an --import-marks= command-line option overrides
        any "feature import-marks" command in the stream.
 
        second, an --import-marks= command-line option overrides
        any "feature import-marks" command in the stream.
 
+cat-blob::
+       Ignored.  Versions of fast-import not supporting the
+       "cat-blob" command will exit with a message indicating so.
+       This lets the import error out early with a clear message,
+       rather than wasting time on the early part of an import
+       before the unsupported command is detected.
+
 `option`
 ~~~~~~~~
 Processes the specified option so that git fast-import behaves in a
 `option`
 ~~~~~~~~
 Processes the specified option so that git fast-import behaves in a
@@ -930,6 +970,7 @@ not be passed as option:
 * date-format
 * import-marks
 * export-marks
 * date-format
 * import-marks
 * export-marks
+* cat-blob-fd
 * force
 
 Crash Reports
 * force
 
 Crash Reports
index 4bd9bf7d08905ab9e37671734e00222fb1b917ca..dd58f517b9f0e30fcd232afffbdeca1831b1b1e2 100644 (file)
@@ -55,6 +55,8 @@ Format of STDIN stream:
     ('from' sp committish lf)?
     lf?;
 
     ('from' sp committish lf)?
     lf?;
 
+  cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf;
+
   checkpoint ::= 'checkpoint' lf
     lf?;
 
   checkpoint ::= 'checkpoint' lf
     lf?;
 
@@ -361,6 +363,9 @@ static uintmax_t next_mark;
 static struct strbuf new_data = STRBUF_INIT;
 static int seen_data_command;
 
 static struct strbuf new_data = STRBUF_INIT;
 static int seen_data_command;
 
+/* Where to write output of cat-blob commands */
+static int cat_blob_fd = STDOUT_FILENO;
+
 static void parse_argv(void);
 
 static void write_branch_report(FILE *rpt, struct branch *b)
 static void parse_argv(void);
 
 static void write_branch_report(FILE *rpt, struct branch *b)
@@ -2689,6 +2694,81 @@ static void parse_reset_branch(void)
                unread_command_buf = 1;
 }
 
                unread_command_buf = 1;
 }
 
+static void cat_blob_write(const char *buf, unsigned long size)
+{
+       if (write_in_full(cat_blob_fd, buf, size) != size)
+               die_errno("Write to frontend failed");
+}
+
+static void cat_blob(struct object_entry *oe, unsigned char sha1[20])
+{
+       struct strbuf line = STRBUF_INIT;
+       unsigned long size;
+       enum object_type type = 0;
+       char *buf;
+
+       if (!oe || oe->pack_id == MAX_PACK_ID) {
+               buf = read_sha1_file(sha1, &type, &size);
+       } else {
+               type = oe->type;
+               buf = gfi_unpack_entry(oe, &size);
+       }
+
+       /*
+        * Output based on batch_one_object() from cat-file.c.
+        */
+       if (type <= 0) {
+               strbuf_reset(&line);
+               strbuf_addf(&line, "%s missing\n", sha1_to_hex(sha1));
+               cat_blob_write(line.buf, line.len);
+               strbuf_release(&line);
+               free(buf);
+               return;
+       }
+       if (!buf)
+               die("Can't read object %s", sha1_to_hex(sha1));
+       if (type != OBJ_BLOB)
+               die("Object %s is a %s but a blob was expected.",
+                   sha1_to_hex(sha1), typename(type));
+       strbuf_reset(&line);
+       strbuf_addf(&line, "%s %s %lu\n", sha1_to_hex(sha1),
+                                               typename(type), size);
+       cat_blob_write(line.buf, line.len);
+       strbuf_release(&line);
+       cat_blob_write(buf, size);
+       cat_blob_write("\n", 1);
+       free(buf);
+}
+
+static void parse_cat_blob(void)
+{
+       const char *p;
+       struct object_entry *oe = oe;
+       unsigned char sha1[20];
+
+       /* cat-blob SP <object> LF */
+       p = command_buf.buf + strlen("cat-blob ");
+       if (*p == ':') {
+               char *x;
+               oe = find_mark(strtoumax(p + 1, &x, 10));
+               if (x == p + 1)
+                       die("Invalid mark: %s", command_buf.buf);
+               if (!oe)
+                       die("Unknown mark: %s", command_buf.buf);
+               if (*x)
+                       die("Garbage after mark: %s", command_buf.buf);
+               hashcpy(sha1, oe->idx.sha1);
+       } else {
+               if (get_sha1_hex(p, sha1))
+                       die("Invalid SHA1: %s", command_buf.buf);
+               if (p[40])
+                       die("Garbage after SHA1: %s", command_buf.buf);
+               oe = find_object(sha1);
+       }
+
+       cat_blob(oe, sha1);
+}
+
 static void parse_checkpoint(void)
 {
        if (object_count) {
 static void parse_checkpoint(void)
 {
        if (object_count) {
@@ -2773,6 +2853,14 @@ static void option_export_marks(const char *marks)
        safe_create_leading_directories_const(export_marks_file);
 }
 
        safe_create_leading_directories_const(export_marks_file);
 }
 
+static void option_cat_blob_fd(const char *fd)
+{
+       unsigned long n = ulong_arg("--cat-blob-fd", fd);
+       if (n > (unsigned long) INT_MAX)
+               die("--cat-blob-fd cannot exceed %d", INT_MAX);
+       cat_blob_fd = (int) n;
+}
+
 static void option_export_pack_edges(const char *edges)
 {
        if (pack_edges)
 static void option_export_pack_edges(const char *edges)
 {
        if (pack_edges)
@@ -2826,6 +2914,8 @@ static int parse_one_feature(const char *feature, int from_stream)
                option_import_marks(feature + 13, from_stream);
        } else if (!prefixcmp(feature, "export-marks=")) {
                option_export_marks(feature + 13);
                option_import_marks(feature + 13, from_stream);
        } else if (!prefixcmp(feature, "export-marks=")) {
                option_export_marks(feature + 13);
+       } else if (!strcmp(feature, "cat-blob")) {
+               ; /* Don't die - this feature is supported */
        } else if (!prefixcmp(feature, "relative-marks")) {
                relative_marks_paths = 1;
        } else if (!prefixcmp(feature, "no-relative-marks")) {
        } else if (!prefixcmp(feature, "relative-marks")) {
                relative_marks_paths = 1;
        } else if (!prefixcmp(feature, "no-relative-marks")) {
@@ -2920,6 +3010,11 @@ static void parse_argv(void)
                if (parse_one_feature(a + 2, 0))
                        continue;
 
                if (parse_one_feature(a + 2, 0))
                        continue;
 
+               if (!prefixcmp(a + 2, "cat-blob-fd=")) {
+                       option_cat_blob_fd(a + 2 + strlen("cat-blob-fd="));
+                       continue;
+               }
+
                die("unknown option %s", a);
        }
        if (i != global_argc)
                die("unknown option %s", a);
        }
        if (i != global_argc)
@@ -2971,6 +3066,8 @@ int main(int argc, const char **argv)
                        parse_new_tag();
                else if (!prefixcmp(command_buf.buf, "reset "))
                        parse_reset_branch();
                        parse_new_tag();
                else if (!prefixcmp(command_buf.buf, "reset "))
                        parse_reset_branch();
+               else if (!prefixcmp(command_buf.buf, "cat-blob "))
+                       parse_cat_blob();
                else if (!strcmp("checkpoint", command_buf.buf))
                        parse_checkpoint();
                else if (!prefixcmp(command_buf.buf, "progress "))
                else if (!strcmp("checkpoint", command_buf.buf))
                        parse_checkpoint();
                else if (!prefixcmp(command_buf.buf, "progress "))
index c80bb0cf10680dce5d28197416dff7d851aa553f..a0162b73d9e63659f6ef9102daede32bce816f04 100755 (executable)
@@ -23,11 +23,18 @@ file5_data='an inline file.
 file6_data='#!/bin/sh
 echo "$@"'
 
 file6_data='#!/bin/sh
 echo "$@"'
 
+>empty
+
 ###
 ### series A
 ###
 
 test_tick
 ###
 ### series A
 ###
 
 test_tick
+
+test_expect_success 'empty stream succeeds' '
+       git fast-import </dev/null
+'
+
 cat >input <<INPUT_END
 blob
 mark :2
 cat >input <<INPUT_END
 blob
 mark :2
@@ -1632,6 +1639,190 @@ test_expect_success 'R: feature no-relative-marks should be honoured' '
     test_cmp marks.new non-relative.out
 '
 
     test_cmp marks.new non-relative.out
 '
 
+test_expect_success 'R: feature cat-blob supported' '
+       echo "feature cat-blob" |
+       git fast-import
+'
+
+test_expect_success 'R: cat-blob-fd must be a nonnegative integer' '
+       test_must_fail git fast-import --cat-blob-fd=-1 </dev/null
+'
+
+test_expect_success 'R: print old blob' '
+       blob=$(echo "yes it can" | git hash-object -w --stdin) &&
+       cat >expect <<-EOF &&
+       ${blob} blob 11
+       yes it can
+
+       EOF
+       echo "cat-blob $blob" |
+       git fast-import --cat-blob-fd=6 6>actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'R: in-stream cat-blob-fd not respected' '
+       echo hello >greeting &&
+       blob=$(git hash-object -w greeting) &&
+       cat >expect <<-EOF &&
+       ${blob} blob 6
+       hello
+
+       EOF
+       git fast-import --cat-blob-fd=3 3>actual.3 >actual.1 <<-EOF &&
+       cat-blob $blob
+       EOF
+       test_cmp expect actual.3 &&
+       test_cmp empty actual.1 &&
+       git fast-import 3>actual.3 >actual.1 <<-EOF &&
+       option cat-blob-fd=3
+       cat-blob $blob
+       EOF
+       test_cmp empty actual.3 &&
+       test_cmp expect actual.1
+'
+
+test_expect_success 'R: print new blob' '
+       blob=$(echo "yep yep yep" | git hash-object --stdin) &&
+       cat >expect <<-EOF &&
+       ${blob} blob 12
+       yep yep yep
+
+       EOF
+       git fast-import --cat-blob-fd=6 6>actual <<-\EOF &&
+       blob
+       mark :1
+       data <<BLOB_END
+       yep yep yep
+       BLOB_END
+       cat-blob :1
+       EOF
+       test_cmp expect actual
+'
+
+test_expect_success 'R: print new blob by sha1' '
+       blob=$(echo "a new blob named by sha1" | git hash-object --stdin) &&
+       cat >expect <<-EOF &&
+       ${blob} blob 25
+       a new blob named by sha1
+
+       EOF
+       git fast-import --cat-blob-fd=6 6>actual <<-EOF &&
+       blob
+       data <<BLOB_END
+       a new blob named by sha1
+       BLOB_END
+       cat-blob $blob
+       EOF
+       test_cmp expect actual
+'
+
+test_expect_success 'setup: big file' '
+       (
+               echo "the quick brown fox jumps over the lazy dog" >big &&
+               for i in 1 2 3
+               do
+                       cat big big big big >bigger &&
+                       cat bigger bigger bigger bigger >big ||
+                       exit
+               done
+       )
+'
+
+test_expect_success 'R: print two blobs to stdout' '
+       blob1=$(git hash-object big) &&
+       blob1_len=$(wc -c <big) &&
+       blob2=$(echo hello | git hash-object --stdin) &&
+       {
+               echo ${blob1} blob $blob1_len &&
+               cat big &&
+               cat <<-EOF
+
+               ${blob2} blob 6
+               hello
+
+               EOF
+       } >expect &&
+       {
+               cat <<-\END_PART1 &&
+                       blob
+                       mark :1
+                       data <<data_end
+               END_PART1
+               cat big &&
+               cat <<-\EOF
+                       data_end
+                       blob
+                       mark :2
+                       data <<data_end
+                       hello
+                       data_end
+                       cat-blob :1
+                       cat-blob :2
+               EOF
+       } |
+       git fast-import >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'setup: have pipes?' '
+       rm -f frob &&
+       if mkfifo frob
+       then
+               test_set_prereq PIPE
+       fi
+'
+
+test_expect_success PIPE 'R: copy using cat-file' '
+       expect_id=$(git hash-object big) &&
+       expect_len=$(wc -c <big) &&
+       echo $expect_id blob $expect_len >expect.response &&
+
+       rm -f blobs &&
+       cat >frontend <<-\FRONTEND_END &&
+       #!/bin/sh
+       cat <<EOF &&
+       feature cat-blob
+       blob
+       mark :1
+       data <<BLOB
+       EOF
+       cat big
+       cat <<EOF
+       BLOB
+       cat-blob :1
+       EOF
+
+       read blob_id type size <&3 &&
+       echo "$blob_id $type $size" >response &&
+       dd if=/dev/stdin of=blob bs=$size count=1 <&3 &&
+       read newline <&3 &&
+
+       cat <<EOF &&
+       commit refs/heads/copied
+       committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+       data <<COMMIT
+       copy big file as file3
+       COMMIT
+       M 644 inline file3
+       data <<BLOB
+       EOF
+       cat blob &&
+       cat <<EOF
+       BLOB
+       EOF
+       FRONTEND_END
+
+       mkfifo blobs &&
+       (
+               export GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL GIT_COMMITTER_DATE &&
+               sh frontend 3<blobs |
+               git fast-import --cat-blob-fd=3 3>blobs
+       ) &&
+       git show copied:file3 >actual &&
+       test_cmp expect.response response &&
+       test_cmp big actual
+'
+
 cat >input << EOF
 option git quiet
 blob
 cat >input << EOF
 option git quiet
 blob
@@ -1640,8 +1831,6 @@ hi
 
 EOF
 
 
 EOF
 
-touch empty
-
 test_expect_success 'R: quiet option results in no stats being output' '
     cat input | git fast-import 2> output &&
     test_cmp empty output
 test_expect_success 'R: quiet option results in no stats being output' '
     cat input | git fast-import 2> output &&
     test_cmp empty output