Merge branch 'ps/urlmatch-wildcard'
authorJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2017 21:57:12 +0000 (13:57 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 27 Feb 2017 21:57:12 +0000 (13:57 -0800)
The <url> part in "http.<url>.<variable>" configuration variable
can now be spelled with '*' that serves as wildcard.
E.g. "http.https://*.example.com.proxy" can be used to specify the
proxy used for https://a.example.com, https://b.example.com, etc.,
i.e. any host in the example.com domain.

* ps/urlmatch-wildcard:
urlmatch: allow globbing for the URL host part
urlmatch: include host in urlmatch ranking
urlmatch: split host and port fields in `struct url_info`
urlmatch: enable normalization of URLs with globs
mailmap: add Patrick Steinhardt's work address

.mailmap
Documentation/config.txt
t/t1300-repo-config.sh
urlmatch.c
urlmatch.h
index ab59b2fac613013ffcf5e041fc2c942f3f97cb1f..e06526a49300eb78e5cac3504902d6a3b1053324 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -177,6 +177,7 @@ Paolo Bonzini <bonzini@gnu.org> <paolo.bonzini@lu.unisi.ch>
 Pascal Obry <pascal@obry.net> <pascal.obry@gmail.com>
 Pascal Obry <pascal@obry.net> <pascal.obry@wanadoo.fr>
 Pat Notz <patnotz@gmail.com> <pknotz@sandia.gov>
+Patrick Steinhardt <ps@pks.im> <patrick.steinhardt@elego.de>
 Paul Mackerras <paulus@samba.org> <paulus@dorrigo.(none)>
 Paul Mackerras <paulus@samba.org> <paulus@pogo.(none)>
 Peter Baumann <waste.manager@gmx.de> <Peter.B.Baumann@stud.informatik.uni-erlangen.de>
index 015346c4173c5f29b33445280ae849f3fcfffaa3..10351a21000ed6e6dfe7eb05bdad53b9b9fba3e5 100644 (file)
@@ -1919,7 +1919,10 @@ http.<url>.*::
   must match exactly between the config key and the URL.
 
 . Host/domain name (e.g., `example.com` in `https://example.com/`).
-  This field must match exactly between the config key and the URL.
+  This field must match between the config key and the URL. It is
+  possible to specify a `*` as part of the host name to match all subdomains
+  at this level. `https://*.example.com/` for example would match
+  `https://foo.example.com/`, but not `https://foo.bar.example.com/`.
 
 . Port number (e.g., `8080` in `http://example.com:8080/`).
   This field must match exactly between the config key and the URL.
index 923bfc5a2606588f30bebbcfd4c34788976c354c..052f120216670e8c3bb1ef258b02c9fee2cb006f 100755 (executable)
@@ -1177,6 +1177,111 @@ test_expect_success 'urlmatch' '
        test_cmp expect actual
 '
 
+test_expect_success 'urlmatch favors more specific URLs' '
+       cat >.git/config <<-\EOF &&
+       [http "https://example.com/"]
+               cookieFile = /tmp/root.txt
+       [http "https://example.com/subdirectory"]
+               cookieFile = /tmp/subdirectory.txt
+       [http "https://user@example.com/"]
+               cookieFile = /tmp/user.txt
+       [http "https://averylonguser@example.com/"]
+               cookieFile = /tmp/averylonguser.txt
+       [http "https://preceding.example.com"]
+               cookieFile = /tmp/preceding.txt
+       [http "https://*.example.com"]
+               cookieFile = /tmp/wildcard.txt
+       [http "https://*.example.com/wildcardwithsubdomain"]
+               cookieFile = /tmp/wildcardwithsubdomain.txt
+       [http "https://trailing.example.com"]
+               cookieFile = /tmp/trailing.txt
+       [http "https://user@*.example.com/"]
+               cookieFile = /tmp/wildcardwithuser.txt
+       [http "https://sub.example.com/"]
+               cookieFile = /tmp/sub.txt
+       EOF
+
+       echo http.cookiefile /tmp/root.txt >expect &&
+       git config --get-urlmatch HTTP https://example.com >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/subdirectory.txt >expect &&
+       git config --get-urlmatch HTTP https://example.com/subdirectory >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/subdirectory.txt >expect &&
+       git config --get-urlmatch HTTP https://example.com/subdirectory/nested >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/user.txt >expect &&
+       git config --get-urlmatch HTTP https://user@example.com/ >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/subdirectory.txt >expect &&
+       git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/preceding.txt >expect &&
+       git config --get-urlmatch HTTP https://preceding.example.com >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/wildcard.txt >expect &&
+       git config --get-urlmatch HTTP https://wildcard.example.com >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/sub.txt >expect &&
+       git config --get-urlmatch HTTP https://sub.example.com/wildcardwithsubdomain >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/trailing.txt >expect &&
+       git config --get-urlmatch HTTP https://trailing.example.com >actual &&
+       test_cmp expect actual &&
+
+       echo http.cookiefile /tmp/sub.txt >expect &&
+       git config --get-urlmatch HTTP https://user@sub.example.com >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'urlmatch with wildcard' '
+       cat >.git/config <<-\EOF &&
+       [http]
+               sslVerify
+       [http "https://*.example.com"]
+               sslVerify = false
+               cookieFile = /tmp/cookie.txt
+       EOF
+
+       test_expect_code 1 git config --bool --get-urlmatch doesnt.exist https://good.example.com >actual &&
+       test_must_be_empty actual &&
+
+       echo true >expect &&
+       git config --bool --get-urlmatch http.SSLverify https://example.com >actual &&
+       test_cmp expect actual &&
+
+       echo true >expect &&
+       git config --bool --get-urlmatch http.SSLverify https://good-example.com >actual &&
+       test_cmp expect actual &&
+
+       echo true >expect &&
+       git config --bool --get-urlmatch http.sslverify https://deep.nested.example.com >actual &&
+       test_cmp expect actual &&
+
+       echo false >expect &&
+       git config --bool --get-urlmatch http.sslverify https://good.example.com >actual &&
+       test_cmp expect actual &&
+
+       {
+               echo http.cookiefile /tmp/cookie.txt &&
+               echo http.sslverify false
+       } >expect &&
+       git config --get-urlmatch HTTP https://good.example.com >actual &&
+       test_cmp expect actual &&
+
+       echo http.sslverify >expect &&
+       git config --get-urlmatch HTTP https://more.example.com.au >actual &&
+       test_cmp expect actual
+'
+
 # good section hygiene
 test_expect_failure 'unsetting the last key in a section removes header' '
        cat >.git/config <<-\EOF &&
index 132d342bc12bf790f9964f179385aa4f08e6ae00..4bbde924e8bf7e73f4f696d0319235ec56263a8f 100644 (file)
@@ -63,7 +63,50 @@ static int append_normalized_escapes(struct strbuf *buf,
        return 1;
 }
 
-char *url_normalize(const char *url, struct url_info *out_info)
+static const char *end_of_token(const char *s, int c, size_t n)
+{
+       const char *next = memchr(s, c, n);
+       if (!next)
+               next = s + n;
+       return next;
+}
+
+static int match_host(const struct url_info *url_info,
+                     const struct url_info *pattern_info)
+{
+       const char *url = url_info->url + url_info->host_off;
+       const char *pat = pattern_info->url + pattern_info->host_off;
+       int url_len = url_info->host_len;
+       int pat_len = pattern_info->host_len;
+
+       while (url_len && pat_len) {
+               const char *url_next = end_of_token(url, '.', url_len);
+               const char *pat_next = end_of_token(pat, '.', pat_len);
+
+               if (pat_next == pat + 1 && pat[0] == '*')
+                       /* wildcard matches anything */
+                       ;
+               else if ((pat_next - pat) == (url_next - url) &&
+                        !memcmp(url, pat, url_next - url))
+                       /* the components are the same */
+                       ;
+               else
+                       return 0; /* found an unmatch */
+
+               if (url_next < url + url_len)
+                       url_next++;
+               url_len -= url_next - url;
+               url = url_next;
+               if (pat_next < pat + pat_len)
+                       pat_next++;
+               pat_len -= pat_next - pat;
+               pat = pat_next;
+       }
+
+       return (!url_len && !pat_len);
+}
+
+static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs)
 {
        /*
         * Normalize NUL-terminated url using the following rules:
@@ -104,7 +147,7 @@ char *url_normalize(const char *url, struct url_info *out_info)
        struct strbuf norm;
        size_t spanned;
        size_t scheme_len, user_off=0, user_len=0, passwd_off=0, passwd_len=0;
-       size_t host_off=0, host_len=0, port_len=0, path_off, path_len, result_len;
+       size_t host_off=0, host_len=0, port_off=0, port_len=0, path_off, path_len, result_len;
        const char *slash_ptr, *at_ptr, *colon_ptr, *path_start;
        char *result;
 
@@ -191,7 +234,12 @@ char *url_normalize(const char *url, struct url_info *out_info)
                strbuf_release(&norm);
                return NULL;
        }
-       spanned = strspn(url, URL_HOST_CHARS);
+
+       if (allow_globs)
+               spanned = strspn(url, URL_HOST_CHARS "*");
+       else
+               spanned = strspn(url, URL_HOST_CHARS);
+
        if (spanned < colon_ptr - url) {
                /* Host name has invalid characters */
                if (out_info) {
@@ -258,6 +306,7 @@ char *url_normalize(const char *url, struct url_info *out_info)
                                return NULL;
                        }
                        strbuf_addch(&norm, ':');
+                       port_off = norm.len;
                        strbuf_add(&norm, url, slash_ptr - url);
                        port_len = slash_ptr - url;
                }
@@ -265,7 +314,7 @@ char *url_normalize(const char *url, struct url_info *out_info)
                url = slash_ptr;
        }
        if (host_off)
-               host_len = norm.len - host_off;
+               host_len = norm.len - host_off - (port_len ? port_len + 1 : 0);
 
 
        /*
@@ -373,6 +422,7 @@ char *url_normalize(const char *url, struct url_info *out_info)
                out_info->passwd_len = passwd_len;
                out_info->host_off = host_off;
                out_info->host_len = host_len;
+               out_info->port_off = port_off;
                out_info->port_len = port_len;
                out_info->path_off = path_off;
                out_info->path_len = path_len;
@@ -380,6 +430,11 @@ char *url_normalize(const char *url, struct url_info *out_info)
        return result;
 }
 
+char *url_normalize(const char *url, struct url_info *out_info)
+{
+       return url_normalize_1(url, out_info, 0);
+}
+
 static size_t url_match_prefix(const char *url,
                               const char *url_prefix,
                               size_t url_prefix_len)
@@ -414,7 +469,7 @@ static size_t url_match_prefix(const char *url,
 
 static int match_urls(const struct url_info *url,
                      const struct url_info *url_prefix,
-                     int *exactusermatch)
+                     struct urlmatch_item *match)
 {
        /*
         * url_prefix matches url if the scheme, host and port of url_prefix
@@ -433,8 +488,8 @@ static int match_urls(const struct url_info *url,
         * contained a user name or false if url_prefix did not have a
         * user name.  If there is no match *exactusermatch is left untouched.
         */
-       int usermatched = 0;
-       int pathmatchlen;
+       char usermatched = 0;
+       size_t pathmatchlen;
 
        if (!url || !url_prefix || !url->url || !url_prefix->url)
                return 0;
@@ -454,33 +509,53 @@ static int match_urls(const struct url_info *url,
                usermatched = 1;
        }
 
-       /* check the host and port */
-       if (url_prefix->host_len != url->host_len ||
-           strncmp(url->url + url->host_off,
-                   url_prefix->url + url_prefix->host_off, url->host_len))
-               return 0; /* host names and/or ports do not match */
+       /* check the host */
+       if (!match_host(url, url_prefix))
+               return 0; /* host names do not match */
+
+       /* check the port */
+       if (url_prefix->port_len != url->port_len ||
+           strncmp(url->url + url->port_off,
+                   url_prefix->url + url_prefix->port_off, url->port_len))
+               return 0; /* ports do not match */
 
        /* check the path */
        pathmatchlen = url_match_prefix(
                url->url + url->path_off,
                url_prefix->url + url_prefix->path_off,
                url_prefix->url_len - url_prefix->path_off);
+       if (!pathmatchlen)
+               return 0; /* paths do not match */
 
-       if (pathmatchlen && exactusermatch)
-               *exactusermatch = usermatched;
-       return pathmatchlen;
+       if (match) {
+               match->hostmatch_len = url_prefix->host_len;
+               match->pathmatch_len = pathmatchlen;
+               match->user_matched = usermatched;
+       }
+
+       return 1;
+}
+
+static int cmp_matches(const struct urlmatch_item *a,
+                      const struct urlmatch_item *b)
+{
+       if (a->hostmatch_len != b->hostmatch_len)
+               return a->hostmatch_len < b->hostmatch_len ? -1 : 1;
+       if (a->pathmatch_len != b->pathmatch_len)
+               return a->pathmatch_len < b->pathmatch_len ? -1 : 1;
+       if (a->user_matched != b->user_matched)
+               return b->user_matched ? -1 : 1;
+       return 0;
 }
 
 int urlmatch_config_entry(const char *var, const char *value, void *cb)
 {
        struct string_list_item *item;
        struct urlmatch_config *collect = cb;
-       struct urlmatch_item *matched;
+       struct urlmatch_item matched = {0};
        struct url_info *url = &collect->url;
        const char *key, *dot;
        struct strbuf synthkey = STRBUF_INIT;
-       size_t matched_len = 0;
-       int user_matched = 0;
        int retval;
 
        if (!skip_prefix(var, collect->section, &key) || *(key++) != '.') {
@@ -494,13 +569,13 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
                struct url_info norm_info;
 
                config_url = xmemdupz(key, dot - key);
-               norm_url = url_normalize(config_url, &norm_info);
+               norm_url = url_normalize_1(config_url, &norm_info, 1);
                free(config_url);
                if (!norm_url)
                        return 0;
-               matched_len = match_urls(url, &norm_info, &user_matched);
+               retval = match_urls(url, &norm_info, &matched);
                free(norm_url);
-               if (!matched_len)
+               if (!retval)
                        return 0;
                key = dot + 1;
        }
@@ -510,24 +585,18 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 
        item = string_list_insert(&collect->vars, key);
        if (!item->util) {
-               matched = xcalloc(1, sizeof(*matched));
-               item->util = matched;
+               item->util = xcalloc(1, sizeof(matched));
        } else {
-               matched = item->util;
-               /*
-                * Is our match shorter?  Is our match the same
-                * length, and without user while the current
-                * candidate is with user?  Then we cannot use it.
-                */
-               if (matched_len < matched->matched_len ||
-                   ((matched_len == matched->matched_len) &&
-                    (!user_matched && matched->user_matched)))
+               if (cmp_matches(&matched, item->util) < 0)
+                        /*
+                         * Our match is worse than the old one,
+                         * we cannot use it.
+                         */
                        return 0;
                /* Otherwise, replace it with this one. */
        }
 
-       matched->matched_len = matched_len;
-       matched->user_matched = user_matched;
+       memcpy(item->util, &matched, sizeof(matched));
        strbuf_addstr(&synthkey, collect->section);
        strbuf_addch(&synthkey, '.');
        strbuf_addstr(&synthkey, key);
index 528862adc55c43ed26763b3c05e1d27d558a1b74..37ee5da85e2dd3f0ba3ee12d0c515fa6def2a043 100644 (file)
@@ -18,11 +18,12 @@ struct url_info {
        size_t passwd_len;      /* length of passwd; if passwd_off != 0 but
                                   passwd_len == 0, an empty passwd was given */
        size_t host_off;        /* offset into url to start of host name (0 => none) */
-       size_t host_len;        /* length of host name; this INCLUDES any ':portnum';
+       size_t host_len;        /* length of host name;
                                 * file urls may have host_len == 0 */
-       size_t port_len;        /* if a portnum is present (port_len != 0), it has
-                                * this length (excluding the leading ':') at the
-                                * end of the host name (always 0 for file urls) */
+       size_t port_off;        /* offset into url to start of port number (0 => none) */
+       size_t port_len;        /* if a portnum is present (port_off != 0), it has
+                                * this length (excluding the leading ':') starting
+                                * from port_off (always 0 for file urls) */
        size_t path_off;        /* offset into url to the start of the url path;
                                 * this will always point to a '/' character
                                 * after the url has been normalized */
@@ -33,7 +34,8 @@ struct url_info {
 extern char *url_normalize(const char *, struct url_info *);
 
 struct urlmatch_item {
-       size_t matched_len;
+       size_t hostmatch_len;
+       size_t pathmatch_len;
        char user_matched;
 };