clone: do not include authentication data in guessed dir
authorPatrick Steinhardt <ps@pks.im>
Mon, 10 Aug 2015 15:48:23 +0000 (17:48 +0200)
committerJunio C Hamano <gitster@pobox.com>
Mon, 10 Aug 2015 18:01:08 +0000 (11:01 -0700)
If the URI contains authentication data and the URI's path
component is empty, we fail to guess a sensible directory name.
E.g. cloning a repository 'ssh://user:password@example.com/' we
guess a directory name 'password@example.com' where we would want
the hostname only, e.g. 'example.com'.

The naive way of just adding '@' as a path separator would break
cloning repositories like 'foo/bar@baz.git' (which would
currently become 'bar@baz' but would then become 'baz' only).
Instead fix this by first dropping the scheme and then greedily
scanning for an '@' sign until we find the first path separator.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/clone.c
t/t5603-clone-dirname.sh
index ed484cb6f488883872be7cde82c67d12cebeab4e..6aa286fd7be6cc7e6de8194c4c37c84a474be6a2 100644 (file)
@@ -146,30 +146,51 @@ static char *get_repo_path(const char *repo, int *is_bundle)
 
 static char *guess_dir_name(const char *repo, int is_bundle, int is_bare)
 {
-       const char *end = repo + strlen(repo), *start;
+       const char *end = repo + strlen(repo), *start, *ptr;
        size_t len;
        char *dir;
 
+       /*
+        * Skip scheme.
+        */
+       start = strstr(repo, "://");
+       if (start == NULL)
+               start = repo;
+       else
+               start += 3;
+
+       /*
+        * Skip authentication data. The stripping does happen
+        * greedily, such that we strip up to the last '@' inside
+        * the host part.
+        */
+       for (ptr = start; ptr < end && !is_dir_sep(*ptr); ptr++) {
+               if (*ptr == '@')
+                       start = ptr + 1;
+       }
+
        /*
         * Strip trailing spaces, slashes and /.git
         */
-       while (repo < end && (is_dir_sep(end[-1]) || isspace(end[-1])))
+       while (start < end && (is_dir_sep(end[-1]) || isspace(end[-1])))
                end--;
-       if (end - repo > 5 && is_dir_sep(end[-5]) &&
+       if (end - start > 5 && is_dir_sep(end[-5]) &&
            !strncmp(end - 4, ".git", 4)) {
                end -= 5;
-               while (repo < end && is_dir_sep(end[-1]))
+               while (start < end && is_dir_sep(end[-1]))
                        end--;
        }
 
        /*
-        * Find last component, but be prepared that repo could have
-        * the form  "remote.example.com:foo.git", i.e. no slash
-        * in the directory part.
+        * Find last component. To remain backwards compatible we
+        * also regard colons as path separators, such that
+        * cloning a repository 'foo:bar.git' would result in a
+        * directory 'bar' being guessed.
         */
-       start = end;
-       while (repo < start && !is_dir_sep(start[-1]) && start[-1] != ':')
-               start--;
+       ptr = end;
+       while (start < ptr && !is_dir_sep(ptr[-1]) && ptr[-1] != ':')
+               ptr--;
+       start = ptr;
 
        /*
         * Strip .{bundle,git}.
index 765cc434ef64daac6a2383e32aabbf5581345b78..0307462b03688e1f364e5a3949b45a4445914133 100755 (executable)
@@ -77,11 +77,11 @@ test_clone_dir host:foo/.git/// foo
 # omitting the path should default to the hostname
 test_clone_dir ssh://host/ host
 test_clone_dir ssh://host:1234/ host fail
-test_clone_dir ssh://user@host/ host fail
+test_clone_dir ssh://user@host/ host
 test_clone_dir host:/ host fail
 
 # auth materials should be redacted
-test_clone_dir ssh://user:password@host/ host fail
+test_clone_dir ssh://user:password@host/ host
 test_clone_dir ssh://user:password@host:1234/ host fail
 test_clone_dir ssh://user:passw@rd@host:1234/ host fail
 test_clone_dir user@host:/ host fail