git-p4: add config git-p4.pathEncoding
authorLars Schneider <larsxschneider@gmail.com>
Thu, 3 Sep 2015 09:14:07 +0000 (11:14 +0200)
committerJunio C Hamano <gitster@pobox.com>
Thu, 3 Sep 2015 21:11:49 +0000 (14:11 -0700)
Perforce keeps the encoding of a path as given by the originating OS.
Git expects paths encoded as UTF-8. Add a config to tell git-p4 what
encoding Perforce had used for the paths. This encoding is used to
transcode the paths to UTF-8. As an example, Perforce on Windows often
uses “cp1252” to encode path names.

Signed-off-by: Lars Schneider <larsxschneider@gmail.com>
Acked-by: Luke Diamand <luke@diamand.org>
Signed-off-by: Lars Schneider <larsxschneider@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-p4.txt
git-p4.py
t/t9822-git-p4-path-encoding.sh [new file with mode: 0755]
index 82aa5d60736ccc75510c19ddec2da9eb4c5611a8..12a57d49f4d6b42368a487ecde02baa8473cbf6d 100644 (file)
@@ -510,6 +510,13 @@ git-p4.useClientSpec::
        option '--use-client-spec'.  See the "CLIENT SPEC" section above.
        This variable is a boolean, not the name of a p4 client.
 
        option '--use-client-spec'.  See the "CLIENT SPEC" section above.
        This variable is a boolean, not the name of a p4 client.
 
+git-p4.pathEncoding::
+       Perforce keeps the encoding of a path as given by the originating OS.
+       Git expects paths encoded as UTF-8. Use this config to tell git-p4
+       what encoding Perforce had used for the paths. This encoding is used
+       to transcode the paths to UTF-8. As an example, Perforce on Windows
+       often uses “cp1252” to encode path names.
+
 Submit variables
 ~~~~~~~~~~~~~~~~
 git-p4.detectRenames::
 Submit variables
 ~~~~~~~~~~~~~~~~
 git-p4.detectRenames::
index 073f87bbfdc1dc7ebc3fe610e78709667f597d82..b1ad86de7f2b735bcad05d32762e11bf460219fa 100755 (executable)
--- a/git-p4.py
+++ b/git-p4.py
@@ -2213,6 +2213,17 @@ def streamOneP4File(self, file, contents):
             text = regexp.sub(r'$\1$', text)
             contents = [ text ]
 
             text = regexp.sub(r'$\1$', text)
             contents = [ text ]
 
+        if gitConfig("git-p4.pathEncoding"):
+            relPath = relPath.decode(gitConfig("git-p4.pathEncoding")).encode('utf8', 'replace')
+        elif self.verbose:
+            try:
+                relPath.decode('ascii')
+            except:
+                print (
+                    "Path with Non-ASCII characters detected and no path encoding defined. "
+                    "Please check the encoding: %s" % relPath
+                )
+
         self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
 
         # total length...
         self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
 
         # total length...
diff --git a/t/t9822-git-p4-path-encoding.sh b/t/t9822-git-p4-path-encoding.sh
new file mode 100755 (executable)
index 0000000..7b83e69
--- /dev/null
@@ -0,0 +1,58 @@
+#!/bin/sh
+
+test_description='Clone repositories with non ASCII paths'
+
+. ./lib-git-p4.sh
+
+UTF8_ESCAPED="a-\303\244_o-\303\266_u-\303\274.txt"
+ISO8859_ESCAPED="a-\344_o-\366_u-\374.txt"
+
+test_expect_success 'start p4d' '
+       start_p4d
+'
+
+test_expect_success 'Create a repo containing iso8859-1 encoded paths' '
+       (
+               cd "$cli" &&
+               ISO8859="$(printf "$ISO8859_ESCAPED")" &&
+               echo content123 >"$ISO8859" &&
+               p4 add "$ISO8859" &&
+               p4 submit -d "test commit"
+       )
+'
+
+test_expect_failure 'Clone auto-detects depot with iso8859-1 paths' '
+       git p4 clone --destination="$git" //depot &&
+       test_when_finished cleanup_git &&
+       (
+               cd "$git" &&
+               UTF8="$(printf "$UTF8_ESCAPED")" &&
+               echo "$UTF8" >expect &&
+               git -c core.quotepath=false ls-files >actual &&
+               test_cmp expect actual
+       )
+'
+
+test_expect_success 'Clone repo containing iso8859-1 encoded paths with git-p4.pathEncoding' '
+       test_when_finished cleanup_git &&
+       (
+               cd "$git" &&
+               git init . &&
+               git config git-p4.pathEncoding iso8859-1 &&
+               git p4 clone --use-client-spec --destination="$git" //depot &&
+               UTF8="$(printf "$UTF8_ESCAPED")" &&
+               echo "$UTF8" >expect &&
+               git -c core.quotepath=false ls-files >actual &&
+               test_cmp expect actual &&
+
+               echo content123 >expect &&
+               cat "$UTF8" >actual &&
+               test_cmp expect actual
+       )
+'
+
+test_expect_success 'kill p4d' '
+       kill_p4d
+'
+
+test_done