for intermediary filters (e.g. for rewriting commit messages
which refer to older commits, or for stripping blobs by id).
+--reencode=(yes|no|abort)::
+ Specify how to handle `encoding` header in commit objects. When
+ asking to 'abort' (which is the default), this program will die
+ when encountering such a commit object. With 'yes', the commit
+ message will be reencoded into UTF-8. With 'no', the original
+ encoding will be preserved.
+
--refspec::
Apply the specified refspec to each ref exported. Multiple of them can
be specified.
original-oid?
('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+ ('encoding' SP <encoding>)?
data
('from' SP <commit-ish> LF)?
('merge' SP <commit-ish> LF)?
See ``Date Formats'' above for the set of supported formats, and
their syntax.
+`encoding`
+^^^^^^^^^^
+The optional `encoding` command indicates the encoding of the commit
+message. Most commits are UTF-8 and the encoding is omitted, but this
+allows importing commit messages into git without first reencoding them.
+
`from`
^^^^^^
The `from` command is used to specify the commit to initialize
static int progress;
static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
+static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
static int fake_missing_tagger;
static int use_done_feature;
static int no_data;
return 0;
}
+static int parse_opt_reencode_mode(const struct option *opt,
+ const char *arg, int unset)
+{
+ if (unset) {
+ reencode_mode = REENCODE_ABORT;
+ return 0;
+ }
+
+ switch (git_parse_maybe_bool(arg)) {
+ case 0:
+ reencode_mode = REENCODE_NO;
+ break;
+ case 1:
+ reencode_mode = REENCODE_YES;
+ break;
+ default:
+ if (!strcasecmp(arg, "abort"))
+ reencode_mode = REENCODE_ABORT;
+ else
+ return error("Unknown reencoding mode: %s", arg);
+ }
+
+ return 0;
+}
+
static struct decoration idnums;
static uint32_t last_idnum;
bol = memmem(begin, end ? end - begin : strlen(begin),
needle, strlen(needle));
if (!bol)
- return git_commit_encoding;
+ return NULL;
bol += strlen(needle);
eol = strchrnul(bol, '\n');
*eol = '\0';
}
mark_next_object(&commit->object);
- if (anonymize)
+ if (anonymize) {
reencoded = anonymize_commit_message(message);
- else if (!is_encoding_utf8(encoding))
- reencoded = reencode_string(message, "UTF-8", encoding);
+ } else if (encoding) {
+ switch(reencode_mode) {
+ case REENCODE_YES:
+ reencoded = reencode_string(message, "UTF-8", encoding);
+ break;
+ case REENCODE_NO:
+ break;
+ case REENCODE_ABORT:
+ die("Encountered commit-specific encoding %s in commit "
+ "%s; use --reencode=[yes|no] to handle it",
+ encoding, oid_to_hex(&commit->object.oid));
+ }
+ }
if (!commit->parents)
printf("reset %s\n", refname);
printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
if (show_original_ids)
printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
- printf("%.*s\n%.*s\ndata %u\n%s",
+ printf("%.*s\n%.*s\n",
(int)(author_end - author), author,
- (int)(committer_end - committer), committer,
+ (int)(committer_end - committer), committer);
+ if (!reencoded && encoding)
+ printf("encoding %s\n", encoding);
+ printf("data %u\n%s",
(unsigned)(reencoded
? strlen(reencoded) : message
? strlen(message) : 0),
OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
N_("select handling of tags that tag filtered objects"),
parse_opt_tag_of_filtered_mode),
+ OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
+ N_("select handling of commit messages in an alternate encoding"),
+ parse_opt_reencode_mode),
OPT_STRING(0, "export-marks", &export_filename, N_("file"),
N_("Dump marks to this file")),
OPT_STRING(0, "import-marks", &import_filename, N_("file"),
struct branch *b;
char *author = NULL;
char *committer = NULL;
+ const char *encoding = NULL;
struct hash_list *merge_list = NULL;
unsigned int merge_count;
unsigned char prev_fanout, new_fanout;
}
if (!committer)
die("Expected committer but didn't get one");
+ if (skip_prefix(command_buf.buf, "encoding ", &encoding))
+ read_next_command();
parse_data(&msg, 0, NULL);
read_next_command();
parse_from(b);
}
strbuf_addf(&new_data,
"author %s\n"
- "committer %s\n"
- "\n",
+ "committer %s\n",
author ? author : committer, committer);
+ if (encoding)
+ strbuf_addf(&new_data,
+ "encoding %s\n",
+ encoding);
+ strbuf_addch(&new_data, '\n');
strbuf_addbuf(&new_data, &msg);
free(author);
free(committer);
sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
'
+###
+### series X (other new features)
+###
+
+test_expect_success 'X: handling encoding' '
+ test_tick &&
+ cat >input <<-INPUT_END &&
+ commit refs/heads/encoding
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ encoding iso-8859-7
+ data <<COMMIT
+ INPUT_END
+
+ printf "Pi: \360\nCOMMIT\n" >>input &&
+
+ git fast-import <input &&
+ git cat-file -p encoding | grep $(printf "\360") &&
+ git log -1 --format=%B encoding | grep $(printf "\317\200")
+'
+
test_done
test $MUSS = $(git rev-parse --verify refs/tags/muss)
'
-test_expect_success 'iso-8859-1' '
+test_expect_success 'reencoding iso-8859-7' '
- git config i18n.commitencoding ISO8859-1 &&
- # use author and committer name in ISO-8859-1 to match it.
- . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
+ test_when_finished "git reset --hard HEAD~1" &&
+ test_config i18n.commitencoding iso-8859-7 &&
test_tick &&
echo rosten >file &&
- git commit -s -m den file &&
- git fast-export wer^..wer >iso8859-1.fi &&
- sed "s/wer/i18n/" iso8859-1.fi |
+ git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
+ git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
+ sed "s/wer/i18n/" iso-8859-7.fi |
(cd new &&
git fast-import &&
+ # The commit object, if not re-encoded, would be 240 bytes.
+ # Removing the "encoding iso-8859-7\n" header drops 20 bytes.
+ # Re-encoding the Pi character from \xF0 (\360) in iso-8859-7
+ # to \xCF\x80 (\317\200) in UTF-8 adds a byte. Check for
+ # the expected size.
+ test 221 -eq "$(git cat-file -s i18n)" &&
+ # ...and for the expected translation of bytes.
git cat-file commit i18n >actual &&
- grep "Áéí óú" actual)
+ grep $(printf "\317\200") actual &&
+ # Also make sure the commit does not have the "encoding" header
+ ! grep ^encoding actual)
+'
+
+test_expect_success 'aborting on iso-8859-7' '
+ test_when_finished "git reset --hard HEAD~1" &&
+ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
+ test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi
'
+
+test_expect_success 'preserving iso-8859-7' '
+
+ test_when_finished "git reset --hard HEAD~1" &&
+ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
+ git fast-export --reencode=no wer^..wer >iso-8859-7.fi &&
+ sed "s/wer/i18n-no-recoding/" iso-8859-7.fi |
+ (cd new &&
+ git fast-import &&
+ # The commit object, if not re-encoded, is 240 bytes.
+ # Removing the "encoding iso-8859-7\n" header would drops 20
+ # bytes. Re-encoding the Pi character from \xF0 (\360) in
+ # iso-8859-7 to \xCF\x80 (\317\200) in UTF-8 adds a byte.
+ # Check for the expected size...
+ test 240 -eq "$(git cat-file -s i18n-no-recoding)" &&
+ # ...as well as the expected byte.
+ git cat-file commit i18n-no-recoding >actual &&
+ grep $(printf "\360") actual &&
+ # Also make sure the commit has the "encoding" header
+ grep ^encoding actual)
+'
+
+test_expect_success 'encoding preserved if reencoding fails' '
+
+ test_when_finished "git reset --hard HEAD~1" &&
+ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -F "$TEST_DIRECTORY/t9350/broken-iso-8859-7-commit-message.txt" file &&
+ git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
+ sed "s/wer/i18n-invalid/" iso-8859-7.fi |
+ (cd new &&
+ git fast-import &&
+ git cat-file commit i18n-invalid >actual &&
+ # Make sure the commit still has the encoding header
+ grep ^encoding actual &&
+ # Verify that the commit has the expected size; i.e.
+ # that no bytes were re-encoded to a different encoding.
+ test 252 -eq "$(git cat-file -s i18n-invalid)" &&
+ # ...and check for the original special bytes
+ grep $(printf "\360") actual &&
+ grep $(printf "\377") actual)
+'
+
test_expect_success 'import/export-marks' '
git checkout -b marks master &&
test_expect_success 'setup copies' '
- git config --unset i18n.commitencoding &&
git checkout -b copy rein &&
git mv file file3 &&
git commit -m move1 &&
--- /dev/null
+Pi: ð; Invalid: ÿ
\ No newline at end of file
--- /dev/null
+Pi: ð
\ No newline at end of file