fast-import: support 'encoding' commit header
authorElijah Newren <newren@gmail.com>
Tue, 14 May 2019 04:30:59 +0000 (21:30 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 14 May 2019 07:48:56 +0000 (16:48 +0900)
Since git supports commit messages with an encoding other than UTF-8,
allow fast-import to import such commits. This may be useful for folks
who do not want to reencode commit messages from an external system, and
may also be useful to achieve reversible history rewrites (e.g. sha1sum
<-> sha256sum transitions or subtree work) with git repositories that
have used specialized encodings in their commit history.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-fast-import.txt
fast-import.c
t/t9300-fast-import.sh
index d65cdb3d08fd745bd4996d0e45259077ea8eb000..7baf9e47b5e61391fbfd5fe44acbfa9943bbe397 100644 (file)
@@ -388,6 +388,7 @@ change to the project.
        original-oid?
        ('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
        'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+       ('encoding' SP <encoding>)?
        data
        ('from' SP <commit-ish> LF)?
        ('merge' SP <commit-ish> LF)?
@@ -455,6 +456,12 @@ that was selected by the --date-format=<fmt> command-line option.
 See ``Date Formats'' above for the set of supported formats, and
 their syntax.
 
+`encoding`
+^^^^^^^^^^
+The optional `encoding` command indicates the encoding of the commit
+message.  Most commits are UTF-8 and the encoding is omitted, but this
+allows importing commit messages into git without first reencoding them.
+
 `from`
 ^^^^^^
 The `from` command is used to specify the commit to initialize
index f38d04fa58510bb7ab35caf4c43d5b2d954cc292..76a7bd369987f7fed63c0f602efe59e0edbd01b8 100644 (file)
@@ -2585,6 +2585,7 @@ static void parse_new_commit(const char *arg)
        struct branch *b;
        char *author = NULL;
        char *committer = NULL;
+       const char *encoding = NULL;
        struct hash_list *merge_list = NULL;
        unsigned int merge_count;
        unsigned char prev_fanout, new_fanout;
@@ -2607,6 +2608,8 @@ static void parse_new_commit(const char *arg)
        }
        if (!committer)
                die("Expected committer but didn't get one");
+       if (skip_prefix(command_buf.buf, "encoding ", &encoding))
+               read_next_command();
        parse_data(&msg, 0, NULL);
        read_next_command();
        parse_from(b);
@@ -2670,9 +2673,13 @@ static void parse_new_commit(const char *arg)
        }
        strbuf_addf(&new_data,
                "author %s\n"
-               "committer %s\n"
-               "\n",
+               "committer %s\n",
                author ? author : committer, committer);
+       if (encoding)
+               strbuf_addf(&new_data,
+                       "encoding %s\n",
+                       encoding);
+       strbuf_addch(&new_data, '\n');
        strbuf_addbuf(&new_data, &msg);
        free(author);
        free(committer);
index 3668263c4046d96fdc79ea3ebe0c28bcb1f2de24..141b7fa35e74b860d91ea7cdabf48730442ed635 100755 (executable)
@@ -3299,4 +3299,24 @@ test_expect_success !MINGW 'W: get-mark & empty orphan commit with erroneous thi
        sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
 '
 
+###
+### series X (other new features)
+###
+
+test_expect_success 'X: handling encoding' '
+       test_tick &&
+       cat >input <<-INPUT_END &&
+       commit refs/heads/encoding
+       committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+       encoding iso-8859-7
+       data <<COMMIT
+       INPUT_END
+
+       printf "Pi: \360\nCOMMIT\n" >>input &&
+
+       git fast-import <input &&
+       git cat-file -p encoding | grep $(printf "\360") &&
+       git log -1 --format=%B encoding | grep $(printf "\317\200")
+'
+
 test_done